コード例 #1
0
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(
            day=format_date(day)))
    start = to_date(day)
    end = to_date(day + ONE_DAY)
    result = "\n\n== {} ==\n".format(format_date(day))
    comment = []
    for i, page in enumerate(creation_log(start, end), 1):
        gras = ''
        date = ''
        if params.verbose:
            print(i, page["timestamp"])

        dl = deletelog(page["title"])
        if dl:
            page_pas = Page(Site(),
                            "Discussion:" + page["title"] + "/Suppression")
            if page_pas.isRedirectPage():
                page_pas = page_pas.getRedirectTarget()
            if page_pas.exists() and re.search(r'article supprimé',
                                               page_pas.get(), re.I):
                if re.search(
                        r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à',
                        page_pas.get(), re.I):
                    date = u' de %s' % re.search(
                        r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à',
                        page_pas.get(), re.I).group(1)
                comment.append(u'[[%s]] (malgré [[%s|PàS]]%s)' %
                               (page["title"], page_pas.title(), date))
                gras = "'''"
            r = (
                u"* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} puis recréé par {{{{u|{user}}}}}{g} \n"
                .format(title=wiki_param(page["title"]),
                        pas=page_pas.title(),
                        user=wiki_param(page["user"]),
                        date=format_date(from_date(dl["timestamp"])),
                        g=gras))
            if params.verbose:
                print(r)
            result += r

    page = Page(Site(), params.prefix + u'/' + format_date(day, skip_day=True))

    try:
        result = page.get() + result
    except NoPage:
        result = u'{{mise à jour bot|Zérobot}}' + result
    if comment: comment.insert(0, '')
    page.put(
        result,
        comment="Journal des recréations ({day}) ".format(day=format_date(day))
        + ' - '.join(comment))
コード例 #2
0
ファイル: app.py プロジェクト: 5j9/whichsub
def find_sub_templates(
    lookingfor: str, page: Page, wholeword: bool, matchcase: bool
):
    found_templates = []
    if page.isRedirectPage():
        page = page.getRedirectTarget()
    pagetext = page.text
    if not matchcase:
        pagetext = pagetext.lower()
        lookingfor = lookingfor.lower()
    if wholeword:
        pattern = re.compile(r'\b' + re.escape(lookingfor) + r'\b')
        if pattern.search(pagetext):
            found_templates.append(page)
    elif lookingfor in pagetext:
        found_templates.append(page)

    for sub_template in page.templates(content=True):
        if sub_template.isRedirectPage():
            sub_template = sub_template.getRedirectTarget()
        text = sub_template.text if matchcase else sub_template.text.lower()
        if wholeword:
            # noinspection PyUnboundLocalVariable
            if pattern.search(text):
                found_templates.append(sub_template)
        elif lookingfor in text:
            found_templates.append(sub_template)

    # Remove duplicate templates
    return {f.title(): f for f in found_templates}.values()
コード例 #3
0
 def isPeople(self, article):
     site = Site("en")
     page = Page(site, article.decode("utf8"))
     #print article
     #print page.get()
     #print page.get(get_redirect = True)
     #print "redirect?", page.isRedirectPage()
     if page.isRedirectPage():
         page = page.getRedirectTarget()
     #print [cat.title() for cat in page.categories()]
     cats = {cat:1 for cat in page.categories()}
     if any(["People" in tcat.title() for tcat in cats]):
         print(cats)
         return True
     currcats = cats.copy()
     allcats = {}
     depth = 0
     while currcats!={} and depth < 2:
         depth += 1
         newcats = {}
         for cat in currcats:
             if cat in allcats:
                 continue
             allcats[cat] = 1
             parentcats = {cat:1 for cat in cat.categories()}
             if any(["People" in tcat.title() for tcat in parentcats]):
                 print(parentcats)
                 return True
             for parcat in parentcats:
                 if parcat not in allcats and parcat not in newcats:
                     newcats[parcat] = 1
         currcats = newcats
         print(len(currcats), currcats)
     #self.historicCats.update(allcats)
     return False
コード例 #4
0
ファイル: page.py プロジェクト: JJMC89/bsiconsbot
def load_config(page: pywikibot.Page, **kwargs: Any) -> ConfigJSONObject:
    """Load JSON config from the page."""
    if page.isRedirectPage():
        pywikibot.log(f"{page!r} is a redirect.")
        page = page.getRedirectTarget()
    _empty = jsoncfg.loads_config("{}")
    if not page.exists():
        pywikibot.log(f"{page!r} does not exist.")
        return _empty
    try:
        return jsoncfg.loads_config(page.get(**kwargs).strip())
    except pywikibot.exceptions.PageRelatedError:
        return _empty
コード例 #5
0
    def get_wikidata_id(self, page: pywikibot.Page):
        if not page.exists():
            return None

        # T256583, T87345
        page.get(get_redirect=True)
        if page.isRedirectPage():
            page = page.getRedirectTarget()
            page.get()

        item = pywikibot.ItemPage.fromPage(page)
        if not item or not item.exists():
            return None
        return item.title()
コード例 #6
0
def extract_coach_tenures(name):
    """
    Extract a coaches tenures from Wikipedia.
    
    Arguments:
    - name (name of coach)
    
    Returns:
    - list(dict)
    """
    logging.info('Looking for coach %s' % name)
    page_name = get_page_name_from_coach_name_wiki(name)

    # If we can't find a wikipedia page, return immediately
    if not page_name:
        return []
    else:
        logging.debug('Looking up %s as http://en.wikipedia.org/wiki/%s' %
                      (name, page_name))

    # Extract page content from wikipedia and narrow it down to the templates
    p = Page(Site('en', 'wikipedia'), page_name)
    if p.isRedirectPage():
        p = p.getRedirectTarget()
    content = p.get()

    parsed = mwparserfromhell.parse(content)
    templates = parsed.filter_templates()

    # Extract teams and years from the template
    teams, years = None, None
    for t in templates:
        for p in t.params:
            if "coach_teams" in p.name:
                teams = parse_coach_teams_and_positions_from_wiki(p)
            if "coach_years" in p.name:
                years = parse_coach_years_from_wiki(p)

    # If we were not able to extract information from the page, log & return empty
    if not teams or not years:
        logging.warning(
            'ISSUE DETECTED: %s is valid page but no information extracted' %
            name)
        return []

    tenures = [dict(t[0].items() + t[1].items()) for t in zip(teams, years)]
    [d.update({'name': name}) for d in tenures]
    return tenures
コード例 #7
0
ファイル: recreation.py プロジェクト: Toto-Azero/Wikipedia
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(day=format_date(day)))
    start = to_date(day)
    end = to_date(day+ONE_DAY)
    result = "\n\n== {} ==\n".format(format_date(day))
    comment = []
    for i,page in enumerate(creation_log(start,end),1):
        gras = ''
        date = ''
        if params.verbose:
            print (i,page["timestamp"])
    
        dl = deletelog(page["title"])
        if dl:
            page_pas = Page(Site(), "Discussion:" + page["title"] + "/Suppression")
            if page_pas.isRedirectPage():
                page_pas = page_pas.getRedirectTarget()
            if page_pas.exists() and re.search(r'article supprimé', page_pas.get(), re.I):
                if re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I):
                    date = u' de %s' % re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I).group(1)
                comment.append(u'[[%s]] (malgré [[%s|PàS]]%s)' % (page["title"], page_pas.title(), date))
                gras = "'''"
            r = (u"* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} puis recréé par {{{{u|{user}}}}}{g} \n"
                            .format(title = wiki_param(page["title"]),
                            pas =  page_pas.title(),
                            user = wiki_param(page["user"]),
                            date = format_date(from_date(dl["timestamp"])),
                            g = gras))
            if params.verbose:
                print(r)
            result += r
    
    page = Page(Site(), params.prefix + u'/' + format_date(day, skip_day=True))
                                                                                               
    try:
        result = page.get() + result
    except NoPage:
        result = u'{{mise à jour bot|Zérobot}}' + result
    if comment: comment.insert(0, '')
    page.put(result,comment="Journal des recréations ({day}) ".format(day=format_date(day)) + ' - '.join(comment))
コード例 #8
0
ファイル: WikiManager.py プロジェクト: jdc08161063/FunFacts
    def getCategories(self, article):
        baseDir = "articleCategoriesCache/"
        if not os.path.exists(baseDir):
            os.makedirs(baseDir)
        fname = baseDir + article
        if os.path.isfile(fname):
            lines = []
            try:
                with codecs.open(fname, encoding='utf-8') as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "utf8 encoding"
            except:
                with codecs.open(fname) as f:
                    lines = [line.strip() for line in f.readlines()]
                #print "ascii encoding"
            lines = self.filterCategories(lines)
            if lines != []:
                #print "get Cat Cache:", lines
                return lines

        site = Site("en")
        page = Page(site, article.decode("utf8"))
        #print article
        #print page.get()
        #print page.get(get_redirect = True)
        #print "redirect?", page.isRedirectPage()
        if page.isRedirectPage():
            page = page.getRedirectTarget()
        #print [cat.title() for cat in page.categories()]
        cats = sorted([
            cat.title() for cat in page.categories()
            if not cat.isHiddenCategory()
        ])
        #print "downloaded cats1: ", cats
        cats = self.filterCategories(cats)
        #print "downloaded cats2: ", cats
        text = ""
        for cat in cats:
            text += cat + "\n"
        try:
            with codecs.open(fname, "a+") as f:
                f.write(text)
        except:
            with codecs.open(fname, "a+") as f:
                f.write(text.encode('utf-8'))
        return cats