Esempio n. 1
0
class ReplicsCounter():
    def __init__(self):
        self.cache = Storage()
        self.cache.create("articles", \
                    {"oldid":"INT UNIQUE", "name":"TEXT", "ts":"DATE", "replics": "INT"})
    def countPage(self, page):
        """Counts repics at AFI page"""
        sections = {}
        sect = None
        n = -1 # one line for header
        for s in page.getSections():
            if sect != None:
                sections[sect] = (n, s[0])
            sect = s[3]
            n = s[0]
        sections[sect] = (n, len(page.get())) # last one
        
        for s in sections:
            replics = -1 # one for header
            text = page.get()[sections[s][0]:sections[s][1]].splitlines()
        
            for line in text:
                sline = line.strip()
                if (len(sline) > 2):
                    if sline[:2] != "{{" and sline[:-2] != "}}":
                        replics += 1
                        #print "%s %s" % (replics, line)
            wikipedia.output( u"%s %s %s" % (s, sections[s], replics))
            self.cache.execute(u'UPDATE articles SET replics = %s WHERE name = "%s";' % (replics, self.cache.quote(s)))
    def countCat(self, catname):
        cat = catlib.Category(wikipedia.getSite(), catname)
        for page in cat.articles():
            print page
            self.countPage(page)
    def replicsPage(self, pagename):
        r = self.cache.findone('articles', {"name":pagename}, what = ["replics"])
        if r == None:
            return "-"
        else:
            return r[0]