Ejemplo n.º 1
0
    def _add_page_from_template(self, namespace, title, template_name,
                                template_parameters):
        """Add a page by substituting a template.

        Parameters
        ----------
        namespace : str
            Namespace of the page. If None, the default namespace will
            be used.
        title : str
            The title of the page.
        template_name : str
            The name of the template to substitute to create the subpage.
        template_parameters : list or OrderedDict
            Parameters to pass to the template.

        """

        if namespace is None:
            page = Page(self._site, title)
        else:
            page = Page(self._site, title, namespace)
        if page.exists() and not self._overwrite:
            logging.warning(
                "Page '{}' already exists. It will not be created.".format(
                    page.title()))
        else:
            template = Template(template_name, True, template_parameters)
            page.text = template.multiline_string()
            logging.info("Writing to page '{}'.".format(page.title()))
            logging.debug(page.text)
            self._write_page(page)
Ejemplo n.º 2
0
    def userPut(
        self,
        page: pywikibot.Page,
        oldtext: str,
        newtext: str,
        summary: Optional[str] = None,
        minor: bool = True,
        botflag: Optional[bool] = None,
    ) -> None:
        if oldtext == newtext:
            pywikibot.output("No changes were needed on %s" % page.title(as_link=True))
            return

        pywikibot.output("\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title(as_link=True))

        pywikibot.showDiff(oldtext, newtext)
        if summary:
            pywikibot.output("Summary: %s" % summary)

        page.text = newtext
        try:
            page.save(summary=summary, minor=minor, botflag=botflag)
        except pywikibot.EditConflict:
            raise
        except pywikibot.Error as e:
            pywikibot.output("Failed to save %s: %r: %s" % (page.title(as_link=True), e, e))
Ejemplo n.º 3
0
    def task(self):
        list_platzhalter = []
        list_protected = []
        lemma_list = self.get_list()
        for idx, item in enumerate(lemma_list):
            lemma = Page(self.wiki, item["title"])
            if self.is_protected(lemma):
                list_protected.append(lemma.title())
                lemma.protect(protections={
                    "edit": "autoconfirmed",
                    "move": "autoconfirmed"
                },
                              reason="is now common")
            categories = [item.title() for item in lemma.categories()]
            if "Kategorie:RE:Platzhalter" in categories:
                list_platzhalter.append(lemma.title())
            self.logger.info(
                f"{idx}/{len(lemma_list)} prot: {len(list_protected)}, plat: {len(list_platzhalter)} {lemma.title()}"
            )

        page_protected = Page(self.wiki,
                              "Benutzer:THE IT/RE/Arthur Stein/protected")
        page_protected.text = self.join_lists(list_protected)
        page_protected.save()

        page_platzhalter = Page(self.wiki,
                                "Benutzer:THE IT/RE/Arthur Stein/platzhalter")
        page_platzhalter.text = self.join_lists(list_platzhalter)
        page_platzhalter.save()
        return True
Ejemplo n.º 4
0
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(
            day=format_date(day)))
    start = to_date(day)
    end = to_date(day + ONE_DAY)
    result = "\n\n== {} ==\n".format(format_date(day))
    comment = []
    for i, page in enumerate(creation_log(start, end), 1):
        gras = ''
        date = ''
        if params.verbose:
            print(i, page["timestamp"])

        dl = deletelog(page["title"])
        if dl:
            page_pas = Page(Site(),
                            "Discussion:" + page["title"] + "/Suppression")
            if page_pas.isRedirectPage():
                page_pas = page_pas.getRedirectTarget()
            if page_pas.exists() and re.search(r'article supprimé',
                                               page_pas.get(), re.I):
                if re.search(
                        r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à',
                        page_pas.get(), re.I):
                    date = u' de %s' % re.search(
                        r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à',
                        page_pas.get(), re.I).group(1)
                comment.append(u'[[%s]] (malgré [[%s|PàS]]%s)' %
                               (page["title"], page_pas.title(), date))
                gras = "'''"
            r = (
                u"* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} puis recréé par {{{{u|{user}}}}}{g} \n"
                .format(title=wiki_param(page["title"]),
                        pas=page_pas.title(),
                        user=wiki_param(page["user"]),
                        date=format_date(from_date(dl["timestamp"])),
                        g=gras))
            if params.verbose:
                print(r)
            result += r

    page = Page(Site(), params.prefix + u'/' + format_date(day, skip_day=True))

    try:
        result = page.get() + result
    except NoPage:
        result = u'{{mise à jour bot|Zérobot}}' + result
    if comment: comment.insert(0, '')
    page.put(
        result,
        comment="Journal des recréations ({day}) ".format(day=format_date(day))
        + ' - '.join(comment))
Ejemplo n.º 5
0
    def _add_category_page(self, title, categories):
        """Add a page with categories.

        Parameters
        ----------
        title : str
            Title of the page.
        categories : list
            The categories to add to the page.

        """

        page = Page(self._site, title, "Category")
        if page.exists() and not self._overwrite:
            logging.warning(
                "Category page '{}' already exists. It will not be created.".
                format(page.title())  # noqa: E501
            )
        else:
            page.text = ""
            for category in categories:
                if category != title:
                    page.text += "[[Kategori:{}]]\n".format(category)
            logging.info("Writing to category page '{}'".format(page.title()))
            logging.debug(page.text)
            self._write_page(page)
Ejemplo n.º 6
0
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(day=format_date(day)))
    start = to_date(day)
    end = to_date(day+ONE_DAY)
    result = "\n\n== {} ==\n".format(format_date(day))
    comment = []
    for i,page in enumerate(creation_log(start,end),1):
        gras = ''
        date = ''
        if params.verbose:
            print (i,page["timestamp"])
    
        dl = deletelog(page["title"])
        if dl:
            page_pas = Page(Site(), "Discussion:" + page["title"] + "/Suppression")
            if page_pas.isRedirectPage():
                page_pas = page_pas.getRedirectTarget()
            if page_pas.exists() and re.search(r'article supprimé', page_pas.get(), re.I):
                if re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I):
                    date = u' de %s' % re.search(r'\{\{ ?article supprimé[^\}]*\d{1,2} (\S* \d{4}) à', page_pas.get(), re.I).group(1)
                comment.append(u'[[%s]] (malgré [[%s|PàS]]%s)' % (page["title"], page_pas.title(), date))
                gras = "'''"
            r = (u"* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} puis recréé par {{{{u|{user}}}}}{g} \n"
                            .format(title = wiki_param(page["title"]),
                            pas =  page_pas.title(),
                            user = wiki_param(page["user"]),
                            date = format_date(from_date(dl["timestamp"])),
                            g = gras))
            if params.verbose:
                print(r)
            result += r
    
    page = Page(Site(), params.prefix + u'/' + format_date(day, skip_day=True))
                                                                                               
    try:
        result = page.get() + result
    except NoPage:
        result = u'{{mise à jour bot|Zérobot}}' + result
    if comment: comment.insert(0, '')
    page.put(result,comment="Journal des recréations ({day}) ".format(day=format_date(day)) + ' - '.join(comment))
Ejemplo n.º 7
0
def get_topic_articles(page):
    # construct the memory datastore:
    topic = {}
    topic['title'] = page.title()
    # I don't want no stubs
    if len(page.text) < 500:
        return
    topic['content'] = page.text
    try:
        edition_links = [link for link in pagegenerators.LanguageLinksPageGenerator(page)]
    except:
        return
    if edition_links == []:
        return
    topic['language'] = {}
    for link in edition_links:
        lang_code = str(link.site)[-2:]
        if lang_code in langs.keys():
            # I don't want no stubs
            if len(link.text) < 500:
                continue
            page = Page(link)
            topic['language'][lang_code] = {
                                    'title': page.title(),
                                    'orig_content': page.text,
                                    'translated_content': translate(page.text)
                                    }
            microsoft_char_counter(len(page.text))
    if topic['language'] == {}:
        return
    return topic
Ejemplo n.º 8
0
    def __init__(self, page: pywikibot.Page):

        # general
        self.shorttitle = page.title(without_brackets=True)
        self.norefstext = self._refremove(page.text)
        self.test = False  # set to true for test outputs

        # first paragraph (lead) info
        self.firstpar = self._firstpar(self.norefstext)
        self.leadname = self._leadname(self.firstpar) if self.firstpar else None
        self.leadbday = re.sub(self.cleandayR, '', self._leadbday()) if self._leadbday() else None
        self.leadbyear = self._leadbyear()
        self.leaddday = re.sub(self.cleandayR, '', self._leaddday()) if self._leaddday() else None
        self.leaddyear = self._leaddyear()

        # categories info
        self.catbyear = self._catbyear(self.norefstext)
        self.catdyear = self._catdyear(self.norefstext)

        # infobox info
        self.infoboxtitle, self.infoboxparams = self._listinfoboxes(self.norefstext)
        self.infoboxbday = re.sub(self.cleandayR, '', self._infoboxbday()) if self._infoboxbday() else None
        self.infoboxbyear = self._infoboxbyear() if self.infoboxexists else None
        self.infoboxdday = re.sub(self.cleandayR, '', self._infoboxdday()) if self._infoboxdday() else None
        self.infoboxdyear = self._infoboxdyear() if self.infoboxexists else None
        self.infoboxname = self._infoboxname() if self.infoboxexists else None

        # results
        self.isconflicted = self.nameconflict or self.birthdayconflict or self.deathdayconflict
Ejemplo n.º 9
0
def _get_img_path(img: Page, img_dir: Path) -> Tuple[str, Path, Path]:
    img_name = unquote(img.title(with_ns=False, as_url=True))
    img_name_valid = hashlib.md5(img_name.encode('utf-8')).hexdigest()
    img_path = img_dir / (img_name_valid + ".jpg")
    img_path_orig = Path(
        str(img_path) + "_" + Path(img_name).suffix + ".ORIGINAL")

    return img_name, img_path, img_path_orig
Ejemplo n.º 10
0
        def filter_function(article: pywikibot.Page):
            def strip_accents(s):
                return ''.join((c for c in unicodedata.normalize('NFD', s)
                                if unicodedata.category(c) != 'Mn'))

            word = strip_accents(article.title()).lower().replace('’', ' ')
            return not word[0].isdigit(
            ) and word >= 'il ne faut pas dire, fontaine, je ne boirai pas de ton eau'
Ejemplo n.º 11
0
    def check_page(self, pagename):
        """Check one page."""
        pywikibot.output("\nChecking %s" % pagename)
        sys.stdout.flush()
        page1 = Page(self.original, pagename)
        txt1 = page1.text

        if self.options.dest_namespace:
            dest_ns = int(self.options.dest_namespace)
        else:
            dest_ns = None

        for site in self.sites:
            if dest_ns is not None:
                page2 = Page(site, page1.title(withNamespace=False), dest_ns)
                pywikibot.output("\nCross namespace, new title: %s"
                                 % page2.title())
            else:
                page2 = Page(site, pagename)

            if page2.exists():
                txt2 = page2.text
            else:
                txt2 = ''

            if str(site) in config.replicate_replace:
                txt_new = multiple_replace(txt1,
                                           config.replicate_replace[str(site)])
                if txt1 != txt_new:
                    pywikibot.output(
                        'NOTE: text replaced using config.sync_replace')
                    pywikibot.output('%s %s %s' % (txt1, txt_new, txt2))
                    txt1 = txt_new

            if txt1 != txt2:
                pywikibot.output("\n %s DIFFERS" % site)
                self.differences[site].append(pagename)

        if self.options.replace:
            page2.text = txt1
            page2.save(self.put_message(site))
        else:
            sys.stdout.write('.')
            sys.stdout.flush()
    def check_page(self, pagename):
        """Check one page."""
        pywikibot.output('\nChecking ' + pagename)
        sys.stdout.flush()
        page1 = Page(self.original, pagename)
        txt1 = page1.text

        if self.options.dest_namespace:
            dest_ns = int(self.options.dest_namespace)
        else:
            dest_ns = None

        for site in self.sites:
            if dest_ns is not None:
                page2 = Page(site, page1.title(with_ns=False), dest_ns)
                pywikibot.output('\nCross namespace, new title: ' +
                                 page2.title())
            else:
                page2 = Page(site, pagename)

            if page2.exists():
                txt2 = page2.text
            else:
                txt2 = ''

            if str(site) in config.replicate_replace:
                txt_new = multiple_replace(txt1,
                                           config.replicate_replace[str(site)])
                if txt1 != txt_new:
                    pywikibot.output(
                        'NOTE: text replaced using config.sync_replace')
                    pywikibot.output('{0} {1} {2}'.format(txt1, txt_new, txt2))
                    txt1 = txt_new

            if txt1 != txt2:
                pywikibot.output('\n {0} DIFFERS'.format(site))
                self.differences[site].append(pagename)

        if self.options.replace:
            page2.text = txt1
            page2.save(self.put_message(site))
        else:
            sys.stdout.write('.')
            sys.stdout.flush()
Ejemplo n.º 13
0
def scrapePage(page: pywikibot.Page) -> None:
    """Scrape a page's infoboxes and redirects, save them in the `state`."""
    pageData: Any = {'infoboxes': [], 'redirects': {}}
    # Iterate over {{infobox journal}}s on `page`.
    for infobox in getInfoboxJournals(page):
        print('I', end='', flush=True)
        pageData['infoboxes'].append(infobox)
        if 'title' in infobox and infobox['title'] != '':
            state.saveTitleToAbbrev(infobox['title'])
        checkDBAbbrevs(page.title(), infobox)
    # Iterate over pages that are redirects to `page`.
    for r in getRedirectsToPage(page.title(), namespaces=0,
                                total=100, content=True):
        print('R', end='', flush=True)
        pageData['redirects'][r.title()] = r.text
        # r.getRedirectTarget().title()
    state.savePageData(page.title(), pageData)
    state.saveTitleToAbbrev(abbrevUtils.stripTitle(page.title()))
    print('', flush=True)
Ejemplo n.º 14
0
def process(day):
    """
    one day bot processing
     
    arguments:
    day -- python date format
    
    """
    if params.verbose:
        print("processing Journal des recréations ({day})".format(day=format_date(day)))
    start = to_date(day)
    end = to_date(day+ONE_DAY)
    result = "\n== {} ==\n".format(format_date(day))
    comment = ''
    for i,page in enumerate(creation_log(start,end),1):
        gras = ''
        if params.verbose:
            print (i,page["timestamp"])
    
        dl = deletelog(page["title"])
        if dl:
            page_pas = Page(Site(), "Discussion:"+page["title"]+"/Suppression")
            if page_pas.exists() and re.search('\{\{\ ?Article supprimé', page_pas.get(), re.I):
                comment += u' - %s (malgré [[%s|PàS]])' % (page["title"], page_pas.title())
                gras = "'''"
            r = ("* {g}{{{{a-court|{title}}}}} <small>([[{pas}|PàS]])</small> supprimé le {date} recréé par {{{{u|{user}}}}}{g} \n"
                    .format(title = wiki_param(page["title"]) ,
                            pas =  page_pas.title()),
                            user = wiki_param(page["user"]),
                            date = format_date(from_date(dl["timestamp"])),
                            g = gras)
            if params.verbose:
                print(r)
            result += r
    
    page = Page(Site(), params.prefix+"/"+format_date(day,skip_day=True))
                                                                                               
    try:
        result = page.get()+result
    except NoPage:
        pass
    page.put(result,comment="Journal des recréations ({day})".format(day=format_date(day)) + comment)
Ejemplo n.º 15
0
def get_page_from_size(page: pywikibot.Page) -> pywikibot.Page:
    """Return a page based on the current page size."""
    i = 1
    title = page.title()
    while True:
        if not page.exists():
            break
        if len(page.text) < 1e6:
            break
        i += 1
        page = Page(page.site, f"{title} ({i:02d})")
    return page
Ejemplo n.º 16
0
def ParseWikiPagePushInfo(page: pywikibot.Page):
    parsed_text = ParsePage(page)
    # If the score of a trivia is higher than this,
    # we'll try to show it only, without leading text.
    triviaSignificance = float(GetConfig("Wiki", "PushedTermsTTL", 180))
    # Distill text
    bareTitle = BareDisambigTitle(page.title())
    distilled = WikiPageDistiller.DistillHtml(parsed_text)
    info = WikiPagePushInfo(page.title(), page.full_url())
    if distilled.trivia != None:
        # Trivia only
        info.postText = distilled.trivia
        # Leading + trivia
        if distilled.triviaScore < triviaSignificance or not bareTitle in info.postText:
            info.postText = distilled.introduction + info.postText
    else:
        # Leading
        info.postText = distilled.introduction
    #elif len(distilled.introduction) < 50 :
    #info.post
    # Choose cover image
    info.postImageName, info.postImageUrl = GetCoverImage(page)
    return info
Ejemplo n.º 17
0
    def _create_current_projects_template(self):
        """Create a current projects template with the new projects."""
        page_name = self._make_year_title(
            self._config["year_pages"]["current_projects_template"])
        page = Page(self._site, page_name)
        if page.exists() and not self._overwrite:
            logging.warning(
                "Page '{}' already exists. It will not be created.".format(
                    page.title()))
            return

        project_format = "[[{ns}:{{proj}}|{{proj}}]]".format(
            ns=self._config["project_namespace"])
        delimiter = "''' · '''"
        template_data = {}
        for program in self._programs:
            projects = set()
            for strategy in program.get('strategies'):
                # projects sorted by id to get thematic grouping
                projects.update(strategy.get("projects"))
            template_data[program.get('name')] = delimiter.join([
                project_format.format(proj=self._projects[project])
                for project in sorted(projects)
            ])

        template = Template("Aktuella projekt/layout")
        template.add_parameter("år", self._year)
        template.add_parameter("access", template_data["Tillgång"])
        template.add_parameter("use", template_data["Användning"])
        template.add_parameter("community", template_data["Gemenskapen"])
        template.add_parameter("enabling", template_data["Möjliggörande"])

        page.text = template.multiline_string() + \
            "\n<noinclude>{{Dokumentation}}</noinclude>"
        logging.info("Writing to page '{}'.".format(page.title()))
        logging.debug(page.text)
        self._write_page(page)
Ejemplo n.º 18
0
    def feed_archive(self,
                     archive: pywikibot.Page,
                     thread: DiscussionThread,
                     max_archive_size: Size,
                     params=None) -> bool:
        """
        Feed the thread to one of the archives.

        Also check for security violations.

        @return: whether the archive is full
        """
        archive_page = self.get_archive_page(archive.title(with_ns=True),
                                             params)
        return archive_page.feed_thread(thread, max_archive_size)
Ejemplo n.º 19
0
 def get_plain_text(self, page: pywikibot.Page):
     params = {
         'action': 'query',
         'prop': 'extracts',
         'exsentences': 7,
         'explaintext': 1,
         'format': 'json',
         'titles': page.title()
     }
     request = self.site._simple_request(**params)
     response = request.submit()
     try:
         return self.parse_text(next(iter(response['query']['pages'].values()), None)['extract'])
     except (KeyError, TypeError):
         pass
Ejemplo n.º 20
0
def template_title_regex(tpl_page: pywikibot.Page) -> Pattern:
    """
    Return a regex that matches to variations of the template title.

    It supports the transcluding variant as well as localized namespaces and
    case-insensitivity depending on the namespace.

    :param tpl_page: The template page
    :type tpl_page: pywikibot.page.Page
    """
    ns = tpl_page.site.namespaces[tpl_page.namespace()]
    marker = '?' if ns.id == 10 else ''
    title = tpl_page.title(with_ns=False)
    title = case_escape(ns.case, title)

    return re.compile(r'(?:(?:%s):)%s%s' % ('|'.join(ns), marker, title))
Ejemplo n.º 21
0
def delete_page(page: pywikibot.Page, summary: str) -> None:
    """Delete the page and dependent pages."""
    page.delete(reason=summary, prompt=False)
    if page.exists():
        return
    page_link = page.title(as_link=True)
    for redirect in page.backlinks(filter_redirects=True):
        redirect.delete(reason=SUMMARIES['redirect'].format(page_link),
                        prompt=False)
    talk_page = page.toggleTalkPage()
    if talk_page.exists():
        talk_page.delete(reason=SUMMARIES['talk'].format(page_link),
                         prompt=False)
        talk_link = talk_page.title(as_link=True)
        for redirect in talk_page.backlinks(filter_redirects=True):
            redirect.delete(reason=SUMMARIES['redirect'].format(talk_link),
                            prompt=False)
Ejemplo n.º 22
0
def template_title_regex(tpl_page: pywikibot.Page) -> Pattern:
    """
    Return a regex that matches to variations of the template title.

    It supports the transcluding variant as well as localized namespaces and
    case-insensitivity depending on the namespace.

    @param tpl_page: The template page
    @type tpl_page: pywikibot.page.Page
    """
    ns = tpl_page.site.namespaces[tpl_page.namespace()]
    marker = '?' if ns.id == 10 else ''
    title = tpl_page.title(with_ns=False)
    if ns.case != 'case-sensitive':
        title = '[{}{}]{}'.format(re.escape(title[0].upper()),
                                  re.escape(title[0].lower()),
                                  re.escape(title[1:]))
    else:
        title = re.escape(title)

    return re.compile(r'(?:(?:%s):)%s%s' % ('|'.join(ns), marker, title))
Ejemplo n.º 23
0
def GetCoverImage(page: pywikibot.Page):
    '''
    Gets the cover image name and url for a specific Page.
    Returns (None, None) if no cover image is found.
    '''
    try:
        return page.__lmd_cover_image
    except:
        pass
    req = page.site._simple_request(action="query",
                                    titles=page.title(),
                                    prop="pageimages",
                                    piprop="thumbnail|name",
                                    pithumbsize=400)
    data = req.submit()
    assert "query" in data, "API request response lacks 'query' key"
    assert "pages" in data["query"], "API request response lacks 'pages' key"
    _, jpage = data["query"]["pages"].popitem()
    if "thumbnail" in jpage:
        page.__lmd_cover_image = (jpage["pageimage"],
                                  jpage["thumbnail"]["source"])
    else:
        page.__lmd_cover_image = (None, None)
    return page.__lmd_cover_image
def handle_maariv_paper_page(paper_page: pw.Page) -> None:
    publish_date = _extract_paper_page_from_title(paper_page.title())
    if publish_date is None:
        return

    parsed_mw_text = mwparserfromhell.parse(paper_page.text)
    paper_template = parsed_mw_text.filter_templates(
        mark_as_paper_template_name)[0]

    if publish_date_param_name in paper_template:
        logger.info(
            f'Page: {paper_page} is already marked with publish date, skipping this paper'
        )
        return

    paper_template.add(publish_date_param_name, publish_date)
    logger.info(
        f'Added publish date: {publish_date} for page: {paper_page.title()}')

    paper_page.text = parsed_mw_text
    if SHOULD_SAVE:
        paper_page.save(
            summary="MaccabiBotAdd publish dates for maariv papers",
            botflag=True)
Ejemplo n.º 25
0
    def add_project_page(self, phab_id, phab_name, parameters, goals,
                         goal_fulfillments):
        """Add the main project page.

        Parameters
        ----------
        name : str
            The project name in Swedish. This will be used as title
            for the page.
        description : str
            Passed to template as parameter "beskrivning".
        partners : str
            Passed to template as parameter "samarbetspartners".

        """
        name = parameters[self._project_columns["swedish_name"]]
        page = Page(self._site, name, self._config["project_namespace"])
        if page.exists() and not self._overwrite:
            logging.warning(
                "Project page '{}' already exists. It will not be created.".
                format(page.title())  # noqa: E501
            )
        else:
            template = Template(self._config["project_template"], True)
            project_parameters = self._config["project_parameters"].items()
            for template_parameter, label in project_parameters:
                template.add_parameter(
                    template_parameter,
                    parameters[self._project_columns[label]])
            template.add_parameter("year", self._year)
            template.add_parameter("phabricatorId", phab_id)
            template.add_parameter("phabricatorName", phab_name)
            template.add_parameter("bot", "ja")
            content = "{}".format(template)
            page.text = content
            logging.info("Writing to project page '{}'".format(page.title()))
            logging.debug(page.text)
            self._write_page(page)
            for subpage in self._config["subpages"]:
                subpage_parameters = {
                    "år": self._year  # always pass the year parameter
                }
                if "parameters" in subpage:
                    for key, label in subpage["parameters"].items():
                        subpage_parameters[key] = parameters[
                            self._project_columns[label]]
                if "add_goals_parameters" in subpage:
                    # Special case for goals parameters, as they are not
                    # just copied.
                    template_key = \
                        list(subpage["add_goals_parameters"].keys())[0]
                    template_value = self._make_year_title(
                        subpage["add_goals_parameters"][template_key])
                    subpage_parameters[template_key] = \
                        Template(template_value, parameters=goals)
                    subpage_parameters["måluppfyllnad"] = \
                        self._create_goal_fulfillment_text(
                            goals.keys(),
                            goal_fulfillments
                        )  # noqa:E123
                self._add_subpage(name, subpage["title"],
                                  subpage["template_name"], subpage_parameters)
Ejemplo n.º 26
0
 def skip_page(self, page: pywikibot.Page) -> bool:
     """Sikp the page if it is not an SVG."""
     if not isinstance(page, pywikibot.FilePage) or not page.title(
             with_ns=False).lower().endswith('.svg'):
         return True
     return super().skip_page(page)
Ejemplo n.º 27
0
    version_history = page.fullVersionHistory()[::-1]
    size_all_changes = 0
    for idx_rev, revision in enumerate(version_history):
        user = revision.user
        if user == 'Pfaerrich':
             if idx_rev > 0:
                 size_prev = len(version_history[idx_rev-1].text)
             else:
                 size_prev = 0
             size_all_changes += abs(len(version_history[idx_rev].text) - size_prev)
    korrigiert_flag = False
    if size_all_changes > 0:
        for version in page.getVersionHistory():
            if version.user == 'Pfaerrich':
                if re.search('orrigiert', version.comment):
                    korrigiert_flag = True
                    break
    print(size_all_changes, len(page.text), korrigiert_flag)
    if (size_all_changes / len(page.text)) < 0.03 and not korrigiert_flag:
        list_for_pfaerrich.append([page.title(), size_all_changes, len(page.text)])

report_page = Page(wiki, 'Benutzer:THEbotIT/List_for_Pfaerrich')

header = '{|class="wikitable sortable"\n! Lemma\n! Größe\n! geändert von dir'
text = []
for line in list_for_pfaerrich:
    text.append('|-\n|[[{lemma}]]\n|{size}\n|{changes}'.format(lemma=line[0], size=line[2], changes=line[1]))
text = '\n'.join(text)
text = '{header}\n{text}\n|}}'.format(header=header, text=text)
report_page.text = text
report_page.save(botflag=True, summary='blub')
Ejemplo n.º 28
0
        if user == 'Pfaerrich':
            if idx_rev > 0:
                size_prev = len(version_history[idx_rev - 1].text)
            else:
                size_prev = 0
            size_all_changes += abs(
                len(version_history[idx_rev].text) - size_prev)
    korrigiert_flag = False
    if size_all_changes > 0:
        for version in page.getVersionHistory():
            if version.user == 'Pfaerrich':
                if re.search('orrigiert', version.comment):
                    korrigiert_flag = True
                    break
    print(size_all_changes, len(page.text), korrigiert_flag)
    if (size_all_changes / len(page.text)) < 0.03 and not korrigiert_flag:
        list_for_pfaerrich.append(
            [page.title(), size_all_changes,
             len(page.text)])

report_page = Page(wiki, 'Benutzer:THEbotIT/List_for_Pfaerrich')

header = '{|class="wikitable sortable"\n! Lemma\n! Größe\n! geändert von dir'
text = []
for line in list_for_pfaerrich:
    text.append('|-\n|[[{lemma}]]\n|{size}\n|{changes}'.format(
        lemma=line[0], size=line[2], changes=line[1]))
text = '\n'.join(text)
text = '{header}\n{text}\n|}}'.format(header=header, text=text)
report_page.text = text
report_page.save(botflag=True, summary='blub')
    def process_page(self, page: Page):
        page_text = page.get(force=True)
        parsed = mwparserfromhell.parse(page_text)

        year = None
        month = None
        day = None
        entry = None

        for template in parsed.filter_templates():
            if (template.name.matches('Dyktalk')
                    or template.name.matches('DYK talk')) and (
                        not template.has('entry')
                        or len(template.get('entry').value) == 0):
                if year is None:
                    if (not template.has(1)) or (not template.has(2)):
                        print('Skipping {{DYK talk}} page', page,
                              ', no date found')
                        continue

                    print('*', page.title(), template.get(2), template.get(1))
                    year = template.get(2).value.strip()
                    day, month = template.get(1).value.strip().split(' ')

                if entry is None:
                    entry = self.get_entry_for_page(year, month, day, page)

                if entry:
                    print('Adding entry', entry, 'to {{DYK talk}}')
                    template.add('entry', entry)
            elif (template.name.matches('ArticleHistory')
                  or template.name.matches('Article history')) and (
                      not template.has('dykentry')
                      or len(template.get('dykentry').value) == 0):
                if year is None:
                    if not template.has('dykdate'):
                        print('Skipping {{ArticleHistory}} on page', page,
                              ', no date found')
                        continue
                    date = template.get('dykdate').value.strip()
                    print('*', page.title(), date)

                    if ' ' in date:
                        # monthName YYYY
                        if date.count(' ') == 1:
                            date = '1 ' + date
                        day, month, year = date.split(' ')[:3]
                    elif '-' in date:
                        year, month, day = date.split('-')[:3]
                        month = datetime.date(1900, int(month),
                                              1).strftime('%B')
                    else:
                        print('Skipping {{ArticleHistory}} on page', page,
                              ", can't parse date", date)
                        continue
                print(page.title(), year, month, day)

                if entry is None:
                    entry = self.get_entry_for_page(year, month, day, page)

                if entry:
                    print('Adding entry', entry, 'to {{ArticleHistory}}')
                    template.add('dykentry', entry, before='dykdate')

        if entry:
            new_text = str(parsed)
            if (new_text != page.text and self.should_edit()
                    and (not self.is_manual_run or confirm_edit())):
                self.get_mediawiki_api().get_site().login()
                page.text = str(parsed)

                page.save(
                    self.get_task_configuration('missing_blurb_edit_summary'),
                    botflag=self.should_use_bot_flag(),
                )
                self.record_trial_edit()
                return True
        return False
Ejemplo n.º 30
0
 def process_wikipage(self, wikipage: pywikibot.Page, language: str):
     content = wikipage.get()
     title = wikipage.title()
     return self.process_non_wikipage(title, content, language)
Ejemplo n.º 31
0
def makeLanguageMismatchPatch(
        page: pywikibot.Page,
        infoboxId: int,
        infoboxAbbrev: str,
        computedAbbrev: str,
        matchingPatterns: str
) -> Optional[Dict[str, Any]]:
    """Make patchset for Stitchpitch: infobox param and redirects rcats."""
    from unicodedata import normalize
    import mwparserfromhell
    startTimeStamp = datetime.now(timezone.utc).isoformat()
    diff = datetimeFromPWB(Site().server_time()) - datetime.now(timezone.utc)
    if diff > timedelta(minutes=2) or -diff > timedelta(minutes=2):
        raise Exception('Local zone misconfigured or server timezone not UTC!')
    latestRevision = page.latest_revision
    mainEdit = {
        'patchtype': 'edit',  # implies 'nocreate': True
        'slug': f'{infoboxAbbrev} → {computedAbbrev}',
        'details': matchingPatterns,
        'title': page.title(),
        'summary': 'Fix ISO-4 abbreviation to use all language rules.',
        'minor': True,
        'basetimestamp': datetimeFromPWB(latestRevision.timestamp).isoformat(),
        'starttimestamp': startTimeStamp,
        'oldtext': latestRevision.text,
        'oldrevid': latestRevision.revid
    }
    if datetime.fromisoformat(mainEdit['basetimestamp']) > \
       datetime.fromisoformat(startTimeStamp) - timedelta(hours=5):
        print(f'Skipping patch for "{page.title()}":'
              f' edited a short while ago ago.')
        return None
    code = mwparserfromhell.parse(normalize('NFC', latestRevision.text))
    foundInfobox = None  # type: Optional[mwparserfromhell.Template]
    foundId = -1
    for t in code.filter_templates():
        if t.name.matches('infobox journal') or \
           t.name.matches('Infobox Journal'):
            foundId += 1
            if foundId == infoboxId:
                foundInfobox = t
                break
    if not foundInfobox:
        print(f'Skipping patch for "{page.title()}":'
              f' infobox #{infoboxId} not found.')
        return None
    foundAbbrev = str(foundInfobox.get('abbreviation').value)
    if foundAbbrev.strip() != infoboxAbbrev:
        print(f'Skipping patch for "{page.title()}":'
              f' infobox abbrev mismatch (comments?).')
        return None
    foundInfobox.get('abbreviation').value = \
        foundAbbrev.replace(infoboxAbbrev, computedAbbrev, 1)
    mainEdit['text'] = str(code)

    patches = [mainEdit]
    groupDetails = ''

    regex = r' *{{\s*(r|R) from ISO ?4( abbreviation)?\s*}} *\n?'
    abbrevRegex = r'{{\s*(r|R)(edirect)? (from )?(common )?ab[a-z]*\s*}}'
    for rPage in getRedirectsToPage(page.title(), namespaces=0,
                                    total=100, content=True):
        rTitle = rPage.title()
        rRevision = rPage.latest_revision
        cAbbrev = abbrevUtils.stripTitle(computedAbbrev.lower())
        if cAbbrev + ' ' in rTitle.lower() + ' ' or \
           cAbbrev.replace('.', '') + ' ' in rTitle.lower() + ' ':
            newtext = rRevision.text
            if re.search(regex, newtext):
                print(f'Skipping patch for existing page, already marked: {rTitle}')
                groupDetails += 'ok: ' + rTitle + '\n'
                continue
            if not isReplaceableRedirect(rRevision.text, page.title(),
                                         RCatSet.ISO4):
                print(f'Skipping patch for unreplaceable page: {rTitle}')
                groupDetails += 'unrepl: ' + rTitle + '\n'
                continue
            if re.search(abbrevRegex, newtext):
                newtext = re.sub(abbrevRegex, '{{R from ISO 4}}', newtext, 1)
            else:
                newtext += '\n{{R from ISO 4}}'
            markPatch = {
                'patchtype': 'edit',
                'slug': 'mark new?',
                'title': rTitle,
                'summary': 'Fix ISO-4 abbreviation to use all language rules.',
                'minor': True,
                'basetimestamp':
                    datetimeFromPWB(rRevision.timestamp).isoformat(),
                'starttimestamp': startTimeStamp,
                'oldtext': rRevision.text,
                'oldrevid': rRevision.revid,
                'text': newtext
            }
            patches.append(markPatch)
        elif re.search(regex, rRevision.text):
            unmarkPatch = {
                'patchtype': 'edit',
                'slug': 'unmark old',
                'title': rTitle,
                'summary': 'Fix ISO-4 abbreviation to use all language rules.',
                'minor': True,
                'basetimestamp':
                    datetimeFromPWB(rRevision.timestamp).isoformat(),
                'starttimestamp': startTimeStamp,
                'oldtext': rRevision.text,
                'oldrevid': rRevision.revid,
                'text': re.sub(regex, '{{R from abbreviation}}\n', rRevision.text)
            }
            if infoboxAbbrev.lower() in rTitle.lower() or \
               infoboxAbbrev.replace('.', '').lower() in rTitle.lower():
                patches.append(unmarkPatch)
            else:
                print(f'Skip patch unmark on unrecog ISO-4: {rTitle}')
                groupDetails += 'unrecog ISO-4: ' + rTitle + '\n'
        else:
            groupDetails += '??: ' + rTitle + '\n'
    shouldHave = [computedAbbrev]
    if computedAbbrev.replace('.', '') != computedAbbrev:
        shouldHave.append(computedAbbrev.replace('.', ''))

    for abbrev in shouldHave:
        rPage = pywikibot.Page(Site(), abbrev)
        if not rPage.exists():
            createPatch = {
                'patchtype': 'create',
                'slug': 'create',
                'title': rPage.title(),
                'summary': 'R from ISO-4 abbreviation of journal title.',
                'minor': True,
                'starttimestamp': startTimeStamp,
                'text': '#REDIRECT[[' + page.title() + ']]\n\n'
                           '{{R from ISO 4}}\n'
            }
            patches.append(createPatch)

    return {
        'patchtype': 'group',
        'slug': f'{infoboxAbbrev} → {computedAbbrev}',
        'details': groupDetails,
        'patches': patches
    }
Ejemplo n.º 32
0
def getRequiredRedirects(page: pywikibot.Page) \
        -> Tuple[Dict[str, RCatSet], bool]:
    """Compute ISO-4 redirects to `page` that we believe should exist.

    Returns `(req, skip)`, where:
        `req[redirectTitle] = redirectCategories`,
        `skip` indicates that we had to skip an infobox, so the result is most
        probably not exhaustive (so we won't report extra existing redirects).
    """
    title = page.title()
    pageData = state.getPageData(title)
    result: DefaultDict[str, RCatSet] = defaultdict(lambda: RCatSet(0))
    skip = False
    for infoboxId, infobox in enumerate(pageData['infoboxes']):
        altName = abbrevUtils.stripTitle(title)
        iTitle = abbrevUtils.sanitizeField(infobox.get('title', ''))
        name = iTitle or altName
        # On Wikipedia, we used to remove subtitles/dependent titles.
        # It seems not to change that much and it seems not doig that is better.
        # name = re.sub(r'(.{6})[-:–(].*', r'\1', name)
        # altName = re.sub(r'(.{6})[-:–(].*', r'\1', altName)
        iAbbrev = abbrevUtils.sanitizeField(infobox.get('abbreviation', ''))
        iAbbrevDotless = iAbbrev.replace('.', '')
        if iAbbrev == '' or iAbbrev == 'no':
            print(f'--Abbrev param empty or "no", ignoring [[{title}]].')
            skip = True
            continue
        if ':' in iAbbrev[:5]:
            print(f'--Abbrev contains early colon, ignoring [[{title}]].')
            reports.reportTitleWithColon(
                title, iTitle, iAbbrev)
            skip = True
            continue
        hasISO4Redirect = \
            iAbbrev in pageData['redirects'] \
            and isValidISO4Redirect(pageData['redirects'][iAbbrev], title,
                                    RCatSet.ISO4, strict=False)
        # If the abbreviation matches the computed one,
        # there should be a dotted and a dotless redirect.
        cLang = 'all'  # abbrevUtils.getLanguage(infobox)
        cAbbrev = state.tryGetAbbrev(name, cLang)
        cAltAbbrev = state.tryGetAbbrev(altName, cLang)
        if cAbbrev is None or cAltAbbrev is None:
            skip = True
            continue
        if (not abbrevUtils.isSoftMatch(iAbbrev, cAbbrev)
                and not abbrevUtils.isSoftMatch(iAbbrev, cAltAbbrev)):
            print(f'--Abbreviations don\'t match, ignoring [[{title}]].')
            otherAbbrevs = list(state.getAllAbbrevs(name).values())
            otherAbbrevs = [a for a in otherAbbrevs
                            if abbrevUtils.isSoftMatch(iAbbrev, a)]
            if otherAbbrevs:
                reports.reportLanguageMismatch(
                    title, iTitle,
                    iAbbrev, cAbbrev, otherAbbrevs[0],
                    abbrevUtils.sanitizeField(infobox.get('language', '')),
                    abbrevUtils.sanitizeField(infobox.get('country', '')),
                    cLang, state.getMatchingPatterns(name), hasISO4Redirect)
                patch = makeLanguageMismatchPatch(
                    page, infoboxId, infobox.get('abbreviation'), cAbbrev,
                    state.getMatchingPatterns(name)
                )
                if patch is not None:
                    patchset['patches'].append(patch)
                    print(f'ADDED PATCH #{len(patchset["patches"])}!!!')
                    with open('patchset.json', 'wt') as f:
                        json.dump(patchset, f)
            else:
                reports.reportProperMismatch(
                    title, iTitle,
                    iAbbrev, cAbbrev, cLang,
                    state.getMatchingPatterns(name), hasISO4Redirect)
            continue
        if iAbbrevDotless == iAbbrev:
            print(f'--Abbreviation is trivial (has no dots), '
                  f'to avoid confusion we\'re ignoring [[{title}]].')
            skip = True
            reports.reportTrivialAbbrev(
                title, iTitle,
                iAbbrev, pageData['redirects'])
        else:
            result[iAbbrev] |= RCatSet.ISO4
            result[iAbbrevDotless] |= RCatSet.ISO4
    for infobox in pageData['infoboxes']:
        nlm: Optional[str] = abbrevUtils.sanitizeField(infobox.get('nlm', ''))
        if nlm and re.fullmatch(r'[\w\ \.,\(\)\[\]\:\'/\-]+', nlm):
            result[nlm] |= RCatSet.NLM
        if not nlm:
            if infobox.get('issn'):
                nlm = issnToAbbrev['nlm'].get(infobox['issn'])
            if not nlm and infobox.get('eissn'):
                nlm = issnToAbbrev['nlm'].get(infobox['eissn'])
            if nlm and nlm == infobox.get('abbreviation').replace('.', ''):
                result[nlm] |= RCatSet.NLM
        msn: Optional[str] = \
            abbrevUtils.sanitizeField(infobox.get('mathscinet', ''))
        if msn and re.fullmatch(r'[\w\ \.\(\)\:\'/\-]+', msn):
            result[msn] |= RCatSet.MSN
            result[msn.replace('.', '')] |= RCatSet.MSN
        if not msn:
            if infobox.get('issn'):
                msn = issnToAbbrev['mathscinet'].get(infobox['issn'])
            if not msn and infobox.get('eissn'):
                msn = issnToAbbrev['mathscinet'].get(infobox['eissn'])
            if msn and msn == iAbbrev:
                result[msn] |= RCatSet.MSN
                result[msn.replace('.', '')] |= RCatSet.MSN
    finalResult: Dict[str, RCatSet] = {}
    for rTitle, rCats in result.items():
        if rCats:
            finalResult[rTitle] = rCats
    return finalResult, skip
Ejemplo n.º 33
0
def fixPageRedirects(page: pywikibot.Page) -> int:
    """Fix redirects to given page."""
    title = page.title()
    pageData = state.getPageData(title)
    (requiredRedirects, skip) = getRequiredRedirects(page)
    nEditedPages = 0
    for rTitle, rCats in requiredRedirects.items():
        rNewContent = rcatSetToRedirectContent(title, rCats)
        # Attempt to create new redirect.
        if rTitle not in pageData['redirects']:
            try:
                exists = pywikibot.Page(Site(), rTitle).exists()
            except pywikibot.exceptions.InvalidTitle:
                exists = False
            if exists:
                print(f'--Skipping existing page [[{rTitle}]] '
                      f'(not a redirect to [[{title}]]).')
                if title == rTitle:
                    continue
                if title not in pywikibot.Page(Site(), rTitle).text:
                    reports.reportExistingOtherPage(title, rTitle)
            else:
                print(f'--Creating redirect '
                      f'from [[{rTitle}]] to [[{title}]]. '
                      f'Created content:\n{rNewContent}\n-----',
                      flush=True)
                nEditedPages += 1
                rPage = pywikibot.Page(Site(), rTitle)
                trySaving(rPage, rNewContent,
                          'Creating redirect from standard abbreviation. ',
                          overwrite=False)
        else:
            rOldContent = pageData['redirects'][rTitle]
            if isValidISO4Redirect(rOldContent, title, rCats):
                print(f'--Skipping existing valid redirect '
                      f'from [[{rTitle}]] to [[{title}]].')
            elif isReplaceableRedirect(rOldContent, title,
                                       rCats | RCatSet.ISO4):
                # Don't log nor edit redirects that would be replaceable
                # except they have ISO4 and we're not sure it should have.
                if not (rCats & RCatSet.ISO4):
                    continue
                print(f'--Replacing existing redirect '
                      f'from [[{rTitle}]] to [[{title}]].\n'
                      f'RCatSet: {rCats}\n'
                      f'Original content:\n{rOldContent}\n----- '
                      f'New content:\n{rNewContent}\n-----',
                      flush=True)
                nEditedPages += 1
                rPage = pywikibot.Page(Site(), rTitle)
                trySaving(rPage, rNewContent,
                          'Marking standard abbrev rcat. ',
                          overwrite=True)
            elif not skip:
                print(f'--Skipping existing dubious redirect '
                      f'from [[{rTitle}]] to [[{title}]].\n'
                      f'RCatSet: {rCats}\n'
                      f'Original content:\n{rOldContent}\n----- ')
                reports.reportExistingOtherRedirect(title, rTitle, rOldContent)
    # Purge page cache to remove warnings about missing redirects.
    if nEditedPages > 0:
        tryPurging(page)

    # Report redirects that we wouldn't add, but exist and are marked as ISO-4.
    if requiredRedirects and not skip:
        expectedAbbrevs = \
            [r.replace('.', '') for r in requiredRedirects]
        potentialAbbrevs = []
        for rTitle, rContent in pageData['redirects'].items():
            if 'from former name' in rContent or '.' not in rTitle:
                cAbbrevEng = state.tryGetAbbrev(
                    abbrevUtils.stripTitle(rTitle), 'eng') or ''
                cAbbrevAll = state.tryGetAbbrev(
                    abbrevUtils.stripTitle(rTitle), 'all') or ''
                cAbbrevEng = cAbbrevEng.replace('.', '')
                cAbbrevAll = cAbbrevAll.replace('.', '')
                if 'from former name' in rContent:
                    if cAbbrevEng != rTitle.replace('.', ''):
                        expectedAbbrevs.append(cAbbrevEng)
                    if cAbbrevAll != rTitle.replace('.', ''):
                        expectedAbbrevs.append(cAbbrevAll)
                elif '.' not in rTitle:
                    if cAbbrevEng != rTitle.replace('.', ''):
                        potentialAbbrevs.append((cAbbrevEng, rTitle))
                    if cAbbrevAll != rTitle.replace('.', ''):
                        potentialAbbrevs.append((cAbbrevAll, rTitle))
        expectedAbbrevs = [a for a in expectedAbbrevs if a]
        potentialAbbrevs = [(a, t) for (a, t) in potentialAbbrevs if a]
        for rTitle, rContent in pageData['redirects'].items():
            if not re.search(r'R from ISO 4', rContent):
                continue
            # Ignore rTitle that contain a computed abbreviation as a
            # substring, assume that it's some valid variation on a subtitle.
            isExpected = False
            rTitleDotless = rTitle.replace('.', '')
            for computedAbbrev in expectedAbbrevs:
                if re.sub(r'\s*[:(].*', '', computedAbbrev) in rTitleDotless:
                    isExpected = True
                    break
            if not isExpected:
                # Find other titles in existing redirects
                # that would ISO-4 abbreviate to it
                potentials = [t for (a, t) in potentialAbbrevs
                              if abbrevUtils.isSoftMatch(rTitleDotless, a)]
                potentials = list(sorted(set(potentials)))
                # Find closest computed abbrev.
                bestAbbrev = ''
                bestDist = len(rTitle)
                for computedAbbrev in sorted(requiredRedirects):
                    dist = Levenshtein.distance(rTitle, computedAbbrev)
                    if dist < bestDist:
                        bestDist = dist
                        bestAbbrev = computedAbbrev
                # Skip if closest abbrev. is far (assume it's from a former
                # title, since there's a ton of cases like that).
                if bestDist <= 8:
                    reports.reportSuperfluousRedirect(
                        title, rTitle, rContent, bestAbbrev, potentials)
    return nEditedPages