コード例 #1
0
 def isPeople(self, article):
     site = Site("en")
     page = Page(site, article.decode("utf8"))
     #print article
     #print page.get()
     #print page.get(get_redirect = True)
     #print "redirect?", page.isRedirectPage()
     if page.isRedirectPage():
         page = page.getRedirectTarget()
     #print [cat.title() for cat in page.categories()]
     cats = {cat:1 for cat in page.categories()}
     if any(["People" in tcat.title() for tcat in cats]):
         print(cats)
         return True
     currcats = cats.copy()
     allcats = {}
     depth = 0
     while currcats!={} and depth < 2:
         depth += 1
         newcats = {}
         for cat in currcats:
             if cat in allcats:
                 continue
             allcats[cat] = 1
             parentcats = {cat:1 for cat in cat.categories()}
             if any(["People" in tcat.title() for tcat in parentcats]):
                 print(parentcats)
                 return True
             for parcat in parentcats:
                 if parcat not in allcats and parcat not in newcats:
                     newcats[parcat] = 1
         currcats = newcats
         print(len(currcats), currcats)
     #self.historicCats.update(allcats)
     return False
コード例 #2
0
ファイル: arthur_stein.py プロジェクト: the-it/WS_THEbotIT
    def task(self):
        list_platzhalter = []
        list_protected = []
        lemma_list = self.get_list()
        for idx, item in enumerate(lemma_list):
            lemma = Page(self.wiki, item["title"])
            if self.is_protected(lemma):
                list_protected.append(lemma.title())
                lemma.protect(protections={
                    "edit": "autoconfirmed",
                    "move": "autoconfirmed"
                },
                              reason="is now common")
            categories = [item.title() for item in lemma.categories()]
            if "Kategorie:RE:Platzhalter" in categories:
                list_platzhalter.append(lemma.title())
            self.logger.info(
                f"{idx}/{len(lemma_list)} prot: {len(list_protected)}, plat: {len(list_platzhalter)} {lemma.title()}"
            )

        page_protected = Page(self.wiki,
                              "Benutzer:THE IT/RE/Arthur Stein/protected")
        page_protected.text = self.join_lists(list_protected)
        page_protected.save()

        page_platzhalter = Page(self.wiki,
                                "Benutzer:THE IT/RE/Arthur Stein/platzhalter")
        page_platzhalter.text = self.join_lists(list_platzhalter)
        page_platzhalter.save()
        return True
コード例 #3
0
ファイル: WikiManager.py プロジェクト: jdc08161063/FunFacts
    def getCategories(self, article):
        baseDir = "articleCategoriesCache/"
        if not os.path.exists(baseDir):
            os.makedirs(baseDir)
        fname = baseDir + article
        if os.path.isfile(fname):
            lines = []
            try:
                with codecs.open(fname, encoding='utf-8') as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "utf8 encoding"
            except:
                with codecs.open(fname) as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "ascii encoding"
            lines = self.filterCategories(lines)
            if lines != []:
                #print "get Cat Cache:", lines
                return lines

        site = Site("en")
        page = Page(site, article.decode("utf8"))
        #print article
        #print page.get()
        #print page.get(get_redirect = True)
        #print "redirect?", page.isRedirectPage()
        if page.isRedirectPage():
            page = page.getRedirectTarget()
        #print [cat.title() for cat in page.categories()]
        cats = sorted([
            cat.title() for cat in page.categories()
            if not cat.isHiddenCategory()
        ])
        #print "downloaded cats1: ", cats
        cats = self.filterCategories(cats)
        #print "downloaded cats2: ", cats
        text = ""
        for cat in cats:
            text += cat + "\n"
        try:
            with codecs.open(fname, "a+") as f:
                f.write(text)
        except:
            with codecs.open(fname, "a+") as f:
                f.write(text.encode('utf-8'))
        return cats