Example #1
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(text,
                                                      insite=self.site,
                                                      template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text,
                                                interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Example #2
0
    def add_template(self, source, dest, task, fromsite):
        """Place or remove the Link_GA/FA template on/from a page."""
        def compile_link(site, templates):
            """compile one link template list."""
            findtemplate = '(%s)' % '|'.join(templates)
            return re.compile(
                r"\{\{%s\|%s\}\}" %
                (findtemplate.replace(u' ', u'[ _]'), site.code),
                re.IGNORECASE)

        tosite = dest.site
        add_tl, remove_tl = self.getTemplateList(tosite.code, task)
        re_Link_add = compile_link(fromsite, add_tl)
        re_Link_remove = compile_link(fromsite, remove_tl)

        text = dest.text
        m1 = add_tl and re_Link_add.search(text)
        m2 = remove_tl and re_Link_remove.search(text)
        changed = False
        interactive = self.getOption('interactive')
        if add_tl:
            if m1:
                pywikibot.output(u"(already added)")
            else:
                # insert just before interwiki
                if (not interactive or pywikibot.input(
                        u'Connecting %s -> %s. Proceed? [Y/N]' %
                    (source.title(), dest.title())) in ['Y', 'y']):
                    if self.getOption('side'):
                        # Placing {{Link FA|xx}} right next to
                        # corresponding interwiki
                        text = (text[:m1.end()] + u" {{%s|%s}}" %
                                (add_tl[0], fromsite.code) + text[m1.end():])
                    else:
                        # Moving {{Link FA|xx}} to top of interwikis
                        iw = textlib.getLanguageLinks(text, tosite)
                        text = textlib.removeLanguageLinks(text, tosite)
                        text += u"%s{{%s|%s}}%s" % (LS, add_tl[0],
                                                    fromsite.code, LS)
                        text = textlib.replaceLanguageLinks(text, iw, tosite)
                    changed = True
        if remove_tl:
            if m2:
                if (changed or  # Don't force the user to say "Y" twice
                        not interactive or pywikibot.input(
                            u'Connecting %s -> %s. Proceed? [Y/N]' %
                            (source.title(), dest.title())) in ['Y', 'y']):
                    text = re.sub(re_Link_remove, '', text)
                    changed = True
            elif task == 'former':
                pywikibot.output(u"(already removed)")
        if changed:
            comment = i18n.twtranslate(tosite, 'featured-' + task,
                                       {'page': unicode(source)})
            try:
                dest.put(text, comment)
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked!' % dest.title())
            except pywikibot.PageNotSaved:
                pywikibot.output(u"Page not saved")
Example #3
0
 def remove_links(self, object, qid, count):
     newtext = textlib.removeLanguageLinks(object['content'], self.enwp)
     #need to build a page object
     page = pywikibot.Page(self.enwp, object['title'])
     page.text = newtext
     summary = self.translate(editsummary).replace('$id', qid).replace(
         '$counter', str(count))
     try:
         self.enwp.editpage(
             page,
             summary,
             nocreate=True,
             lastrev=int(object['revid']),
             token=self.enwp_token,
             skipec=True,
             useid=object['id'],
             timestamp=clean_timestamp(object['timestamp']),
         )
         return
     except pywikibot.exceptions.LockedPage:
         print 'protected'
         return self.logger.error(object, 'Page was protected.', qid)
     except pywikibot.exceptions.EditConflict:
         print 'edit conflict'
         return self.logger.error(object, 'Edit conflict.', qid)
Example #4
0
	def getWordCount(self, text):
		text = textlib.removeDisabledParts(text)
		text = textlib.removeHTMLParts(text)
		text = textlib.removeLanguageLinks(text)
		text = textlib.removeCategoryLinks(text)
		word_list = re.findall(r"[\w']+", text)

		return len(word_list)
Example #5
0
    def removeEmptySections(self, text: str) -> str:
        """Cleanup empty sections."""
        # userspace contains article stubs without nobots/in use templates
        if self.namespace == 2:
            return text

        skippings = ['comment', 'category']
        skip_regexes = _get_regexes(skippings, self.site)
        # site defined templates
        skip_templates = {
            'cs': ('Pahýl[ _]část',),  # stub section
        }
        if self.site.code in skip_templates:
            for template in skip_templates[self.site.code]:
                skip_regexes.append(
                    re.compile(r'\{\{\s*%s\s*\}\}' % template, re.I))
        # empty lists
        skip_regexes.append(re.compile(r'(?m)^[\*#] *$'))

        # get stripped sections
        stripped_text = textlib.removeLanguageLinks(text, self.site, '\n')
        for reg in skip_regexes:
            stripped_text = reg.sub(r'', stripped_text)
        strip_sections = textlib.extract_sections(
            stripped_text, self.site)[1]

        # get proper sections
        header, sections, footer = textlib.extract_sections(text, self.site)

        # iterate stripped sections and create a new page body
        new_body = []
        for i, strip_section in enumerate(strip_sections):
            current_heading = sections[i][0]
            try:
                next_heading = sections[i + 1][0]
            except IndexError:
                next_heading = ''
            current_dep = (len(current_heading)
                           - len(current_heading.lstrip('=')))
            next_dep = len(next_heading) - len(next_heading.lstrip('='))
            if strip_section[1].strip() or current_dep < next_dep:
                new_body.extend(sections[i])
        return header + ''.join(new_body) + footer
Example #6
0
    def removeEmptySections(self, text):
        """Cleanup empty sections."""
        # userspace contains article stubs without nobots/in use templates
        if self.namespace == 2:
            return text

        skippings = ['comment', 'category']
        skip_regexes = _get_regexes(skippings, self.site)
        # site defined templates
        skip_templates = {
            'cs': ('Pahýl[ _]část',),  # stub section
        }
        if self.site.code in skip_templates:
            for template in skip_templates[self.site.code]:
                skip_regexes.append(
                    re.compile(r'\{\{\s*%s\s*\}\}' % template, re.I))
        # empty lists
        skip_regexes.append(re.compile(r'(?m)^[\*#] *$'))

        # get stripped sections
        stripped_text = textlib.removeLanguageLinks(text, self.site, '\n')
        for reg in skip_regexes:
            stripped_text = reg.sub(r'', stripped_text)
        strip_sections = textlib.extract_sections(
            stripped_text, self.site)[1]

        # get proper sections
        header, sections, footer = textlib.extract_sections(text, self.site)

        # iterate stripped sections and create a new page body
        new_body = []
        for i, strip_section in enumerate(strip_sections):
            current_heading = sections[i][0]
            try:
                next_heading = sections[i + 1][0]
            except IndexError:
                next_heading = ''
            current_dep = (len(current_heading)
                           - len(current_heading.lstrip('=')))
            next_dep = len(next_heading) - len(next_heading.lstrip('='))
            if strip_section[1].strip() or current_dep < next_dep:
                new_body.extend(sections[i])
        return header + ''.join(new_body) + footer
Example #7
0
 def remove_links(self, object, qid, count):
     newtext = textlib.removeLanguageLinks(object['content'], self.enwp)
     #need to build a page object
     page = pywikibot.Page(self.enwp, object['title'])
     page.text = newtext
     summary = self.translate(editsummary).replace('$id', qid).replace('$counter', str(count))
     try:
         self.enwp.editpage(
             page,
             summary,
             nocreate=True,
             lastrev=int(object['revid']),
             token=self.enwp_token,
             skipec=True,
             useid=object['id'],
             timestamp=clean_timestamp(object['timestamp']),
         )
         return
     except pywikibot.exceptions.LockedPage:
         print 'protected'
         return self.logger.error(object, 'Page was protected.',qid)
     except pywikibot.exceptions.EditConflict:
         print 'edit conflict'
         return self.logger.error(object, 'Edit conflict.',qid)
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site,
                                      'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text,
                                                interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Example #10
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or
            #               name.title() == name.title().split(":")[0] + title):
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Example #12
0
    def add_template(self, source, dest, task, fromsite):
        """Place or remove the Link_GA/FA template on/from a page."""
        def compile_link(site, templates):
            """Compile one link template list."""
            findtemplate = '(%s)' % '|'.join(templates)
            return re.compile(
                r'\{\{%s\|%s\}\}' %
                (findtemplate.replace(' ', '[ _]'), site.code), re.IGNORECASE)

        tosite = dest.site
        add_tl, remove_tl = self.getTemplateList(tosite.code, task)
        re_link_add = compile_link(fromsite, add_tl)
        re_link_remove = compile_link(fromsite, remove_tl)

        text = dest.text
        m1 = add_tl and re_link_add.search(text)
        m2 = remove_tl and re_link_remove.search(text)
        changed = False
        interactive = self.getOption('interactive')
        if add_tl:
            if m1:
                pywikibot.output('(already added)')
            else:
                # insert just before interwiki
                if (not interactive
                        or pywikibot.input_yn('Connecting %s -> %s. Proceed?' %
                                              (source.title(), dest.title()),
                                              default=False,
                                              automatic_quit=False)):
                    if self.getOption('side'):
                        # Placing {{Link FA|xx}} right next to
                        # corresponding interwiki
                        text = (text[:m1.end()] + ' {{%s|%s}}' %
                                (add_tl[0], fromsite.code) + text[m1.end():])
                    else:
                        # Moving {{Link FA|xx}} to top of interwikis
                        iw = textlib.getLanguageLinks(text, tosite)
                        text = textlib.removeLanguageLinks(text, tosite)
                        text += '%s{{%s|%s}}%s' % (config.LS, add_tl[0],
                                                   fromsite.code, config.LS)
                        text = textlib.replaceLanguageLinks(text, iw, tosite)
                    changed = True
        if remove_tl:
            if m2:
                if (changed  # Don't force the user to say "Y" twice
                        or not interactive or
                        pywikibot.input_yn('Connecting %s -> %s. Proceed?' %
                                           (source.title(), dest.title()),
                                           default=False,
                                           automatic_quit=False)):
                    text = re.sub(re_link_remove, '', text)
                    changed = True
            elif task == 'former':
                pywikibot.output('(already removed)')
        if changed:
            comment = i18n.twtranslate(tosite, 'featured-' + task,
                                       {'page': source})
            try:
                dest.put(text, comment)
                self._save_counter += 1
            except pywikibot.LockedPage:
                pywikibot.output('Page %s is locked!' % dest.title())
            except pywikibot.PageSaveRelatedError:
                pywikibot.output('Page not saved')
Example #13
0
def linkedImages(page):
    """Return a list of Pages that this Page links to.

    Only returns pages from "normal" internal links. Category links are
    omitted unless prefixed with ":". Image links are omitted when parameter
    withImageLinks is False. Embedded templates are omitted (but links
    within them are returned). All interwiki and external links are omitted.

    @param thistxt: the wikitext of the page
    @return: a list of Page objects.
    """

    Rlink = re.compile(r'\[\[(?P<title>[^\]\|\[]*)(\|[^\]]*)?\]\]')
    result = []
    try:
        thistxt = textlib.removeLanguageLinks(page.get(get_redirect=True),
                                              page.site)
    except pywikibot.NoPage:
        raise
    except pywikibot.IsRedirectPage:
        raise
    except pywikibot.SectionError:
        return []
    thistxt = textlib.removeCategoryLinks(thistxt, page.site)

    # remove HTML comments, pre, nowiki, and includeonly sections
    # from text before processing
    thistxt = textlib.removeDisabledParts(thistxt)

    # resolve {{ns:-1}} or {{ns:Help}}
    # thistxt = page.site.resolvemagicwords(thistxt)

    for match in Rlink.finditer(thistxt):
        try:
            #print(match.group(0))
            title = match.group('title')
            title = title.replace("_", " ").strip(" ")
            # print title
            if title == "":
                # empty link - problem in the page
                continue
            # convert relative link to absolute link
            if title.startswith(".."):
                parts = self.title().split('/')
                parts.pop()
                title = '/'.join(parts) + title[2:]
            elif title.startswith("/"):
                title = '%s/%s' % (page.title(), title[1:])
            if title.startswith("#"):
                # this is an internal section link
                continue
            if not page.site.isInterwikiLink(title):
                page2 = pywikibot.Page(page.site, title)
                try:
                    hash(str(page2))
                except Exception:
                    pywikibot.output("Page %s contains invalid link to [[%s]]."
                                 % (page.title(), title))
                    continue
                if not page2.isImage():
                    continue
                if page2.title(withSection=False) and page2 not in result:
                    result.append(page2)
        except pywikibot.NoUsername:
            continue
        except:
            raise
    return result
Example #14
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '{}{}'.format(config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '{}{}'.format(config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text

    if putText and text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if not putText:
            return (text, newtext, always)

        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)

        if always or choice == 'y':
            result = put_text(page,
                              newtext,
                              summary,
                              error_count,
                              asynchronous=not always)
            if result is not None:
                return (result, result, always)
            error_count += 1
Example #15
0
    def add_template(self, source, dest, task, fromsite):
        """Place or remove the Link_GA/FA template on/from a page."""
        def compile_link(site, templates):
            """Compile one link template list."""
            findtemplate = '(%s)' % '|'.join(templates)
            return re.compile(r"\{\{%s\|%s\}\}"
                              % (findtemplate.replace(u' ', u'[ _]'),
                                 site.code), re.IGNORECASE)

        tosite = dest.site
        add_tl, remove_tl = self.getTemplateList(tosite.code, task)
        re_Link_add = compile_link(fromsite, add_tl)
        re_Link_remove = compile_link(fromsite, remove_tl)

        text = dest.text
        m1 = add_tl and re_Link_add.search(text)
        m2 = remove_tl and re_Link_remove.search(text)
        changed = False
        interactive = self.getOption('interactive')
        if add_tl:
            if m1:
                pywikibot.output(u"(already added)")
            else:
                # insert just before interwiki
                if (not interactive or
                    pywikibot.input_yn(
                        u'Connecting %s -> %s. Proceed?'
                        % (source.title(), dest.title()),
                        default=False, automatic_quit=False)):
                    if self.getOption('side'):
                        # Placing {{Link FA|xx}} right next to
                        # corresponding interwiki
                        text = (text[:m1.end()] +
                                u" {{%s|%s}}" % (add_tl[0], fromsite.code) +
                                text[m1.end():])
                    else:
                        # Moving {{Link FA|xx}} to top of interwikis
                        iw = textlib.getLanguageLinks(text, tosite)
                        text = textlib.removeLanguageLinks(text, tosite)
                        text += u"%s{{%s|%s}}%s" % (config.LS, add_tl[0],
                                                    fromsite.code, config.LS)
                        text = textlib.replaceLanguageLinks(text,
                                                            iw, tosite)
                    changed = True
        if remove_tl:
            if m2:
                if (changed or  # Don't force the user to say "Y" twice
                    not interactive or
                    pywikibot.input_yn(
                        u'Connecting %s -> %s. Proceed?'
                        % (source.title(), dest.title()),
                        default=False, automatic_quit=False)):
                    text = re.sub(re_Link_remove, '', text)
                    changed = True
            elif task == 'former':
                pywikibot.output(u"(already removed)")
        if changed:
            comment = i18n.twtranslate(tosite, 'featured-' + task,
                                       {'page': source})
            try:
                dest.put(text, comment)
                self._save_counter += 1
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked!'
                                 % dest.title())
            except pywikibot.PageNotSaved:
                pywikibot.output(u"Page not saved")
Example #16
0
def add_text(
    page,
    addText: str,
    summary: Optional[str] = None,
    regexSkip: Optional[str] = None,
    regexSkipUrl: Optional[str] = None,
    always: bool = False,
    up: bool = False,
    putText: bool = True,
    oldTextGiven: Optional[str] = None,
    reorderEnabled: bool = True,
    create: bool = False
) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]:
    """
    Add text to a page.

    @param page: The page to add text to
    @type page: pywikibot.page.BasePage
    @param addText: Text to add
    @param summary: Summary of changes. If None, beginning of addText is used.
    @param regexSkip: Abort if text on page matches
    @param regexSkipUrl: Abort if full url matches
    @param always: Always add text without user confirmation
    @param up: If True, add text to top of page, else add at bottom.
    @param putText: If True, save changes to the page, else return
        (_, newtext, _)
    @param oldTextGiven: If None fetch page text, else use this text
    @param reorderEnabled: If True place text above categories and
        interwiki, else place at page bottom. No effect if up = False.
    @param create: Create page if it does not exist
    @return: If putText=True: (success, success, always)
        else: (_, newtext, _)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if reorderEnabled:
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '\n' + addText
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '\n' + addText
    else:
        newtext = addText + '\n' + text

    if not putText:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        return (text, newtext, always)

    if text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                continue

        # either always or choice == 'y' is selected
        result = put_text(page,
                          newtext,
                          summary,
                          error_count,
                          asynchronous=not always)
        if result is not None:
            return (result, result, always)
        error_count += 1
Example #17
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site, 'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text, interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
Example #18
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" %
                                 page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(
                    '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')],
                    'n',
                    automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext,
                                 summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext,
                                       summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s' %
                        (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)' %
                                     page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Example #19
0
def add_text(page, addText, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -exceptUrl '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -except '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext,
                                                   categoriesInside, site,
                                                   True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n', automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)