Exemple #1
0
 def toFile(self, fileName, report=None):
     """
     write self contents to a file
     imports and writes necessary images
     @param report if True, this method will output a few messages on sys.stderr;
     if report is a callback function, this function will be called with one parameter.
     """
     outfile=open(fileName, "w")
     outfile.write("%s" %self)
     outfile.close()
     path=os.path.dirname(fileName)
     href=""
     for img in self.imageSet:
         completeUrl="http://%s/index.php/File:%s" %(self.baseAddress, img)
         page=url.urlopen(completeUrl)
         soup=BeautifulSoup(page.read())
         divs=soup.findAll("div", id="file")
         for div in divs:
             a=div.find("a")
             href=a["href"]
             if report==True:
                 print >> sys.stderr, "'%s'" %href
             elif callable(report):
                 report("'%s'" %href)
         imgData=url.urlopen("http://%s/%s" %(self.host,href))
         imgFile=open(os.path.join(path,img),"w")
         imgFile.write(imgData.read())
         imgFile.close()
     return
Exemple #2
0
def getWikiContents(completeUrl):
    """
    @param completeUrl the url of a wiki page, encoded in utf-8
    @return the base URL of the wiki and the wiki code for the page, else two void strings
    """
    result=("","")
    baseUrl=re.match(r"^(http://[^/]+/).*",completeUrl).group(1)
    try:
        sPage=url.urlopen(completeUrl)
    except urllib2.HTTPError:
        return result
    if sPage:
        soup=BeautifulSoup(sPage.read())
        pattern=re.compile('/(.*)/index.php\?title=.*action=edit$')
        editAddress=soup.find(href=pattern)
        if editAddress != None:
            localUrl=editAddress.attrMap["href"]
            base=pattern.match(localUrl).group(1)
            completeUrl=baseUrl+localUrl
        else:
            return result
    else:
        return result
    bPage=url.urlopen(completeUrl)
    if bPage:
        soup=BeautifulSoup(bPage.read())
        area=soup.find('textarea', id="wpTextbox1")
        if area and len(area.contents)>0 :
            result = (baseUrl+base, area.contents[0])
    return result
Exemple #3
0
 def getImages(self, path, report=None):
     """
     gets the necessary images from the mediawiki
     @param path the path to write images
     @param report if True, this method will output a few messages on sys.stderr;
     if report is a callback function, this function will be called with one parameter.
     """
     href=""
     for img in self.imageSet:
         completeUrl="http://%s/index.php/File:%s" %(self.baseAddress, img)
         page=url.urlopen(completeUrl)
         soup=BeautifulSoup(page.read())
         divs=soup.findAll("div", id="file")
         for div in divs:
             a=div.find("a")
             href=a["href"]
             if report==True:
                 print >> sys.stderr, "'%s'" %href
             elif callable(report):
                 report("'%s'" %href)
         imgData=url.urlopen("http://%s/%s" %(self.host,href))
         imgFile=open(os.path.join(path,img),"w")
         imgFile.write(imgData.read())
         imgFile.close()
     return
Exemple #4
0
 def wikiTemplates(self, contents):
     """
     calls the special page ExpandTemplates in the wiki
     to apply templates which must be processed by mediawiki
     @param contents the code with templates (unicode string)
     @result the code with all templates expanded
     """
     completeUrl="http://%s/index.php/%s" %(self.baseAddress, "Sp%C3%A9cial:ExpandTemplates")
     data={"contexttitle":"",
           "input":"%s" %contents.encode("utf-8"),
           "removecomments":"1",
           "generate_xml":"0"}
     data=url.urlencode(data)
     page=url.urlopen(completeUrl, data)
     soup = BeautifulSoup(page.read())
     area = soup.find('textarea', id="output")
     if area:
         processedContents=area.contents[0]
     else:
         processedContents=""
     return processedContents