def toFile(self, fileName, report=None): """ write self contents to a file imports and writes necessary images @param report if True, this method will output a few messages on sys.stderr; if report is a callback function, this function will be called with one parameter. """ outfile=open(fileName, "w") outfile.write("%s" %self) outfile.close() path=os.path.dirname(fileName) href="" for img in self.imageSet: completeUrl="http://%s/index.php/File:%s" %(self.baseAddress, img) page=url.urlopen(completeUrl) soup=BeautifulSoup(page.read()) divs=soup.findAll("div", id="file") for div in divs: a=div.find("a") href=a["href"] if report==True: print >> sys.stderr, "'%s'" %href elif callable(report): report("'%s'" %href) imgData=url.urlopen("http://%s/%s" %(self.host,href)) imgFile=open(os.path.join(path,img),"w") imgFile.write(imgData.read()) imgFile.close() return
def getWikiContents(completeUrl): """ @param completeUrl the url of a wiki page, encoded in utf-8 @return the base URL of the wiki and the wiki code for the page, else two void strings """ result=("","") baseUrl=re.match(r"^(http://[^/]+/).*",completeUrl).group(1) try: sPage=url.urlopen(completeUrl) except urllib2.HTTPError: return result if sPage: soup=BeautifulSoup(sPage.read()) pattern=re.compile('/(.*)/index.php\?title=.*action=edit$') editAddress=soup.find(href=pattern) if editAddress != None: localUrl=editAddress.attrMap["href"] base=pattern.match(localUrl).group(1) completeUrl=baseUrl+localUrl else: return result else: return result bPage=url.urlopen(completeUrl) if bPage: soup=BeautifulSoup(bPage.read()) area=soup.find('textarea', id="wpTextbox1") if area and len(area.contents)>0 : result = (baseUrl+base, area.contents[0]) return result
def getImages(self, path, report=None): """ gets the necessary images from the mediawiki @param path the path to write images @param report if True, this method will output a few messages on sys.stderr; if report is a callback function, this function will be called with one parameter. """ href="" for img in self.imageSet: completeUrl="http://%s/index.php/File:%s" %(self.baseAddress, img) page=url.urlopen(completeUrl) soup=BeautifulSoup(page.read()) divs=soup.findAll("div", id="file") for div in divs: a=div.find("a") href=a["href"] if report==True: print >> sys.stderr, "'%s'" %href elif callable(report): report("'%s'" %href) imgData=url.urlopen("http://%s/%s" %(self.host,href)) imgFile=open(os.path.join(path,img),"w") imgFile.write(imgData.read()) imgFile.close() return
def wikiTemplates(self, contents): """ calls the special page ExpandTemplates in the wiki to apply templates which must be processed by mediawiki @param contents the code with templates (unicode string) @result the code with all templates expanded """ completeUrl="http://%s/index.php/%s" %(self.baseAddress, "Sp%C3%A9cial:ExpandTemplates") data={"contexttitle":"", "input":"%s" %contents.encode("utf-8"), "removecomments":"1", "generate_xml":"0"} data=url.urlencode(data) page=url.urlopen(completeUrl, data) soup = BeautifulSoup(page.read()) area = soup.find('textarea', id="output") if area: processedContents=area.contents[0] else: processedContents="" return processedContents