Esempio n. 1
0
    def test_in_place_replace(self):
        """Test in-place category change is reversible."""
        dummy = pywikibot.Category(self.site, 'foo')
        dummy.sortKey = 'bah'

        cats = textlib.getCategoryLinks(self.old, site=self.site)

        # Sanity checking
        temp = textlib.replaceCategoryInPlace(self.old, cats[0], dummy, site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp, dummy, cats[0], site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old, cats[1], dummy, site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp, dummy, cats[1], site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old, cats[2], dummy, site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp, dummy, cats[2], site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old, cats[3], dummy, site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp, dummy, cats[3], site=self.site)
        self.assertEqual(self.old, new)

        new_cats = textlib.getCategoryLinks(new, site=self.site)
        self.assertEqual(cats, new_cats)
Esempio n. 2
0
 def test_adjoining_links(self):
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
Esempio n. 3
0
    def test_in_place_replace(self):
        """Test in-place category change is reversible."""
        dummy = pywikibot.Category(self.site, 'foo')
        dummy.sortKey = 'bah'

        cats = textlib.getCategoryLinks(self.old, site=self.site)

        # Sanity checking
        temp = textlib.replaceCategoryInPlace(self.old,
                                              cats[0],
                                              dummy,
                                              site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp,
                                             dummy,
                                             cats[0],
                                             site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old,
                                              cats[1],
                                              dummy,
                                              site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp,
                                             dummy,
                                             cats[1],
                                             site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old,
                                              cats[2],
                                              dummy,
                                              site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp,
                                             dummy,
                                             cats[2],
                                             site=self.site)
        self.assertEqual(self.old, new)

        temp = textlib.replaceCategoryInPlace(self.old,
                                              cats[3],
                                              dummy,
                                              site=self.site)
        self.assertNotEqual(temp, self.old)
        new = textlib.replaceCategoryInPlace(temp,
                                             dummy,
                                             cats[3],
                                             site=self.site)
        self.assertEqual(self.old, new)

        new_cats = textlib.getCategoryLinks(new, site=self.site)
        self.assertEqual(cats, new_cats)
Esempio n. 4
0
 def test_adjoining_links(self):
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, "")
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ""  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
Esempio n. 5
0
 def test_adjoining_links(self):
     """Test getting and replacing adjacent categories."""
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
Esempio n. 6
0
    def test_in_place_retain_sort(self):
        """Test in-place category change does not alter the sortkey."""
        # sort key should be retained when the new cat sortKey is None
        dummy = pywikibot.Category(self.site, 'foo')
        self.assertIsNone(dummy.sortKey)

        cats = textlib.getCategoryLinks(self.old, site=self.site)

        self.assertEqual(cats[3].sortKey, 'key')
        orig_sortkey = cats[3].sortKey
        temp = textlib.replaceCategoryInPlace(self.old, cats[3], dummy, site=self.site)
        self.assertNotEqual(self.old, temp)
        new_dummy = textlib.getCategoryLinks(temp, site=self.site)[3]
        self.assertIsNotNone(new_dummy.sortKey)
        self.assertEqual(orig_sortkey, new_dummy.sortKey)
Esempio n. 7
0
    def run(self):
        """Run the bot."""
        if not all((self.opt.action, self.generator)):
            return
        catmode = (self.opt.action == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page',
                                       'Category')[catmode])(commons,
                                                             page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(with_ns=not catmode)
                    if page.title() == pagetitle:
                        old_text = page.get()
                        text = old_text

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(
                            self, 'findTemplate{}'.format(
                                (2, 3)[catmode]).search(text))
                        if s or s2:
                            pywikibot.output('** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                '%s{{commons%s|%s}}' %
                                (text, ('', 'cat')[catmode], pagetitle),
                                cats,
                                site=page.site)
                            comment = i18n.twtranslate(
                                page.site,
                                'commons_link{}-template-added'.format(
                                    ('', '-cat')[catmode]))
                            try:
                                self.userPut(page,
                                             old_text,
                                             text,
                                             summary=comment)
                            except EditConflictError:
                                pywikibot.output(
                                    'Skipping {} because of edit conflict'.
                                    format(page.title()))

                except NoPageError:
                    pywikibot.output('{} does not exist in Commons'.format(
                        page.__class__.__name__))

            except NoPageError:
                pywikibot.output('Page {} does not exist'.format(page.title()))
            except IsRedirectPageError:
                pywikibot.output('Page {} is a redirect; skipping.'.format(
                    page.title()))
            except LockedPageError:
                pywikibot.output('Page {} is locked'.format(page.title()))
Esempio n. 8
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(text,
                                                      insite=self.site,
                                                      template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text,
                                                interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Esempio n. 9
0
 def setUpClass(cls):
     super(TestCategoryRearrangement, cls).setUpClass()
     cls.site = cls.get_site()
     cls.old = ('[[Category:Cat1]]%(LS)s[[Category:Cat2|]]%(LS)s'
                '[[Category:Cat1| ]]%(LS)s[[Category:Cat2|key]]'
                % {'LS': config.LS})
     cls.cats = textlib.getCategoryLinks(cls.old, site=cls.site)
Esempio n. 10
0
    def get_final_categories(self):
        """
        Return final categories to keep.

        We keep any categories added after the first revision
        plus any categories in the new text which were not also
        present in the first revision and later removed.
        :return: list of pywikibot.Category
        """
        first_cats = set(textlib.getCategoryLinks(self.first_text, self.site))
        # can't use page.categories() since some cats are embedded by templates
        last_cats = set(textlib.getCategoryLinks(self.last_text, self.site))
        new_cats = set(textlib.getCategoryLinks(self.new_text, self.site))

        cats = ((new_cats - (first_cats - last_cats)) |
                (last_cats - first_cats))
        return list(cats)
Esempio n. 11
0
    def get_final_categories(self):
        """
        Return final categories to keep.

        We keep any categories added after the first revision
        plus any categories in the new text which were not also
        present in the first revision and later removed.
        :return: list of pywikibot.Category
        """
        first_cats = set(textlib.getCategoryLinks(self.first_text, self.site))
        # can't use page.categories() since some cats are embedded by templates
        last_cats = set(textlib.getCategoryLinks(self.last_text, self.site))
        new_cats = set(textlib.getCategoryLinks(self.new_text, self.site))

        cats = ((new_cats - (first_cats - last_cats)) |
                (last_cats - first_cats))
        return list(cats)
Esempio n. 12
0
 def apply(self, page, summaries=[], *args):
     result = super().apply(page, summaries, *args)
     if result:
         categories = textlib.getCategoryLinks(page.text, site=self.site)
         categories.sort(key=self.sortCategories)
         page.text = textlib.replaceCategoryLinks(page.text, categories,
                                                  self.site)
     return result
    def run(self):
        """Run the bot."""
        if not all((self.getOption('action'), self.generator)):
            return
        catmode = (self.getOption('action') == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page',
                                       'Category')[catmode])(commons,
                                                             page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(withNamespace=not catmode)
                    if page.title() == pagetitle:
                        oldText = page.get()
                        text = oldText

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(self, 'findTemplate%d' %
                                     (2, 3)[catmode]).search(text)
                        if s or s2:
                            pywikibot.output(u'** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                u'%s{{commons%s|%s}}' %
                                (text, ('', 'cat')[catmode], pagetitle),
                                cats,
                                site=page.site)
                            comment = i18n.twtranslate(
                                page.site, 'commons_link%s-template-added' %
                                ('', '-cat')[catmode])
                            try:
                                self.userPut(page,
                                             oldText,
                                             text,
                                             summary=comment)
                            except pywikibot.EditConflict:
                                pywikibot.output(
                                    u'Skipping %s because of edit conflict' %
                                    page.title())

                except pywikibot.NoPage:
                    pywikibot.output(u'%s does not exist in Commons' %
                                     page.__class__.__name__)

            except pywikibot.NoPage:
                pywikibot.output(u'Page %s does not exist' % page.title())
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect; skipping.' %
                                 page.title())
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked' % page.title())
Esempio n. 14
0
    def test_in_place_retain_sort(self):
        """Test in-place category change does not alter the sortkey."""
        # sort key should be retained when the new cat sortKey is None
        dummy = pywikibot.Category(self.site, 'foo')
        self.assertIsNone(dummy.sortKey)

        cats = textlib.getCategoryLinks(self.old, site=self.site)

        self.assertEqual(cats[3].sortKey, 'key')
        orig_sortkey = cats[3].sortKey
        temp = textlib.replaceCategoryInPlace(self.old,
                                              cats[3],
                                              dummy,
                                              site=self.site)
        self.assertNotEqual(self.old, temp)
        new_dummy = textlib.getCategoryLinks(temp, site=self.site)[3]
        self.assertIsNotNone(new_dummy.sortKey)
        self.assertEqual(orig_sortkey, new_dummy.sortKey)
Esempio n. 15
0
 def test_adjoining_links(self):
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(self.cats, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     self.assertEqual(old, new)
     config.line_separator = sep  # restore the default separator
Esempio n. 16
0
def get_category_status(site, page, cat):
    state = False
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)
    for c in cats:
        if c.title() == catpl.title():
            state = True
    return state
Esempio n. 17
0
def get_category_status(site, page, cat):
    state = False
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)
    for c in cats:
        if c.title() == catpl.title():
            state = True
    return state
Esempio n. 18
0
    def run(self):
        """Run the bot."""
        if not all((self.getOption('action'), self.generator)):
            return
        catmode = (self.getOption('action') == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page', 'Category')[catmode]
                                      )(commons, page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(withNamespace=not catmode)
                    if page.title() == pagetitle:
                        oldText = page.get()
                        text = oldText

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(self, 'findTemplate%d'
                                           % (2, 3)[catmode]).search(text)
                        if s or s2:
                            pywikibot.output(u'** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                u'%s{{commons%s|%s}}'
                                % (text, ('', 'cat')[catmode], pagetitle),
                                cats, site=page.site)
                            comment = i18n.twtranslate(
                                page.site, 'commons_link%s-template-added'
                                % ('', '-cat')[catmode])
                            try:
                                self.userPut(page, oldText, text,
                                             summary=comment)
                            except pywikibot.EditConflict:
                                pywikibot.output(
                                    u'Skipping %s because of edit conflict'
                                    % page.title())

                except pywikibot.NoPage:
                    pywikibot.output(u'%s does not exist in Commons'
                                     % page.__class__.__name__)

            except pywikibot.NoPage:
                pywikibot.output(u'Page %s does not exist' % page.title())
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect; skipping.'
                                 % page.title())
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked' % page.title())
Esempio n. 19
0
 def test_templates(self):
     self.site = self.get_site()
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:{{P1|Foo}}]]", self.site), [pywikibot.page.Category(self.site, "Foo")]
     )
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:{{P1|Foo}}|bar]]", self.site),
         [pywikibot.page.Category(self.site, "Foo", sortKey="bar")],
     )
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]", self.site),
         [pywikibot.page.Category(self.site, "Foo", sortKey="bar")],
     )
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:Foo{{!}}bar]]", self.site),
         [pywikibot.page.Category(self.site, "Foo", sortKey="bar")],
     )
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]", self.site),
         [pywikibot.page.Category(self.site, "Foo", sortKey="bar"), pywikibot.page.Category(self.site, "Wikipedia")],
     )
     self.assertEqual(
         textlib.getCategoryLinks("[[Category:Foo{{!}}and{{!}}bar]]", self.site),
         [pywikibot.page.Category(self.site, "Foo", sortKey="and|bar")],
     )
     self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks, "[[Category:nasty{{{!}}]]", self.site)
Esempio n. 20
0
 def test_templates(self):
     self.site = self.get_site()
     self.assertEqual(
         textlib.getCategoryLinks('[[Category:{{P1|Foo}}]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo')])
     self.assertEqual(
         textlib.getCategoryLinks('[[Category:{{P1|Foo}}|bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(
         textlib.getCategoryLinks('[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]',
                                  self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(
         textlib.getCategoryLinks('[[Category:Foo{{!}}bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(
         textlib.getCategoryLinks(
             '[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]',
             self.site), [
                 pywikibot.page.Category(self.site, 'Foo', sortKey='bar'),
                 pywikibot.page.Category(self.site, 'Wikipedia')
             ])
     self.assertEqual(
         textlib.getCategoryLinks('[[Category:Foo{{!}}and{{!}}bar]]',
                                  self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='and|bar')])
     self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks,
                       '[[Category:nasty{{{!}}]]', self.site)
Esempio n. 21
0
 def test_templates(self):
     """Test normal templates inside category links."""
     self.site = self.get_site()
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:{{P1|Foo}}]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo')])
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:{{P1|Foo}}|bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:{{P1|{{P2|L33t|Foo}}}}|bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:Foo{{!}}bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar')])
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:Foo{{!}}bar]][[Category:Wiki{{P2||pedia}}]]',
         self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='bar'),
          pywikibot.page.Category(self.site, 'Wikipedia')])
     self.assertEqual(textlib.getCategoryLinks(
         '[[Category:Foo{{!}}and{{!}}bar]]', self.site),
         [pywikibot.page.Category(self.site, 'Foo', sortKey='and|bar')])
     self.assertRaises(pywikibot.InvalidTitle, textlib.getCategoryLinks,
                       '[[Category:nasty{{{!}}]]', self.site)
Esempio n. 22
0
 def treat_page(self):
     page = self.current_page
     categories = textlib.getCategoryLinks(page.text, site=self.site)
     titles = map(
         lambda cat: cat.title(with_ns=False,
                               with_section=False,
                               allow_interwiki=False,
                               insite=self.site), categories)
     matches = list(filter(bool, map(self.categoryR.fullmatch, titles)))
     if not matches:
         pywikibot.output('No birthdate category found')
         return
     fullmatch = matches.pop()
     if matches:
         pywikibot.output('Multiple birthdate categories found')
         return
     birth_date = fullmatch.group(1)
     search_query = 'linksto:"%s"' % page.title()
     search_query += r' insource:/\[\[[^\[\]]+\]\]'
     search_query += r' +\(\* *\[*%s\]*\)/' % birth_date
     search_query += ' -intitle:"Seznam"'
     pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join(
         map(
             lambda p: re.escape(p.title()),
             chain([page],
                   page.backlinks(followRedirects=False,
                                  filterRedirects=True,
                                  namespaces=[0]))))
     pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date
     regex = re.compile(pattern)
     for ref_page in PreloadingGenerator(
             SearchPageGenerator(search_query,
                                 namespaces=[0],
                                 site=self.site)):
         text = ref_page.text
         # todo: multiple matches
         match = regex.search(text)
         if not match:
             continue
         inside, left, year1, right = match.groups('')
         new_text = text[:match.start()]
         new_text += replace_pattern.format(inside=inside,
                                            left=left,
                                            right=right,
                                            year1=year1,
                                            year2=self.year)
         new_text += text[match.end():]
         self.userPut(ref_page,
                      ref_page.text,
                      new_text,
                      summary='doplnění data úmrtí')
Esempio n. 23
0
def addCategory(site, page, cat):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)

    catpl = pywikibot.Category(site, cat)

    if catpl not in cats:
        print("\t'" + cat + "' not in page categories. Adding") 
        cats.append(catpl)
        text = textlib.replaceCategoryLinks(page.text, cats, site=site)
        userPut(page, old_text, text, minor=True, botflag=True)
        return True
    else:
        print("\t'" + cat + "' already in page categories")
        return False
Esempio n. 24
0
 def treat(self, page):
     """Process one page."""
     if page.isRedirectPage():
         # if it's a redirect use the redirect target instead
         redirTarget = page.getRedirectTarget()
         if self.follow_redirects:
             self.current_page = redirTarget
         else:
             pywikibot.warning(
                 "Page %s is a redirect to %s; skipping." % (page.title(asLink=True), redirTarget.title(asLink=True))
             )
             # loading it will throw an error if we don't jump out before
             return
     else:
         self.current_page = page
     if self.current_page.exists():
         # Load the page
         text = self.current_page.text
     elif self.create:
         pywikibot.output("Page %s doesn't exist yet; creating." % (self.current_page.title(asLink=True)))
         text = ""
     else:
         pywikibot.output("Page %s does not exist; skipping." % self.current_page.title(asLink=True))
         return
     # store old text, so we don't have reload it every time
     old_text = text
     cats = textlib.getCategoryLinks(text)
     pywikibot.output("Current categories:")
     for cat in cats:
         pywikibot.output("* %s" % cat.title())
     catpl = pywikibot.Category(self.current_page.site, self.newcat)
     if catpl in cats:
         pywikibot.output("%s is already in %s." % (self.current_page.title(), catpl.title()))
     else:
         if self.sort:
             catpl = self.sorted_by_last_name(catpl, self.current_page)
         pywikibot.output("Adding %s" % catpl.title(asLink=True))
         cats.append(catpl)
         text = textlib.replaceCategoryLinks(text, cats, site=self.current_page.site)
         comment = self.comment
         if not comment:
             comment = i18n.twtranslate(
                 self.current_page.site, "category-adding", {"newcat": catpl.title(withNamespace=False)}
             )
         try:
             self.userPut(self.current_page, old_text, text, summary=comment, minor=True, botflag=True)
         except pywikibot.PageSaveRelatedError as error:
             pywikibot.output("Page %s not saved: %s" % (self.current_page.title(asLink=True), error))
Esempio n. 25
0
def set_category_status(site, page, cat, status):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)

    if status:
        if catpl not in cats:
            cats.append(catpl)
    else:
        if catpl in cats:
            cats.remove(catpl)
    text = textlib.replaceCategoryLinks(page.text, cats, site=site)
    if old_text != text:
        page.text = text
        page.save(minor=True, botflag=True)
        return True
    return False
Esempio n. 26
0
def set_category_status(site, page, cat, status):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)

    if status:
        if catpl not in cats:
            cats.append(catpl)
    else:
        if catpl in cats:
            cats.remove(catpl)
    text = textlib.replaceCategoryLinks(page.text, cats, site=site)
    if old_text != text:
        page.text = text
        page.save(minor=True, botflag=True)
        return True
    return False
Esempio n. 27
0
    def apply(self, page, *args):
        # remove empty list items
        page.text = re.sub(r'^\* *\n', '', page.text, flags=re.M)

        # sort categories
        categories = textlib.getCategoryLinks(page.text, site=page.site)
        category_living = pywikibot.Category(page.site, 'Žijící lidé')
        if category_living in categories:
            if any(
                    cat.title(with_ns=False).startswith('Úmrtí ')
                    for cat in categories):
                categories.remove(category_living)
                page.text = textlib.replaceCategoryLinks(
                    page.text, categories, page.site)

        page.text = re.sub(
            r'(\{\{ *(?:%s)[^}]+\}\}\n)\n(\[\[(?:%s))' %
            ('|'.join(map(re.escape, self.site.getmagicwords('defaultsort'))),
             '|'.join(self.site.namespaces[14])), r'\1\2', page.text)
    def harvestSortKey(self, match):
        text = match.group()
        if self.defaultsortR.search(text):
            return text

        keys = {}
        categories = textlib.getCategoryLinks(text, site=self.site)
        if not any(
                category.title(with_ns=False) in ('Muži', 'Žijící lidé',
                                                  'Ženy')
                for category in categories):
            return text

        for category in categories:
            key = category.sortKey
            if key:
                key = self.tidy_sortkey(key)
                if not key.strip():
                    continue
                keys.setdefault(key, 0.0)
                keys[key] += 1
                if len(keys) > 1:
                    return text

        if not keys:
            return text

        if sum(keys.values()) < 4:
            return text

        key = list(keys.keys()).pop()
        for category in categories:
            if category.sortKey is not None:
                if self.tidy_sortkey(category.sortKey) == key:
                    category.sortKey = None

        categories.sort(key=self.sort_category)
        text = textlib.removeCategoryLinks(text, self.site)
        text += '\n\n{{DEFAULTSORT:%s}}' % key
        before, _, after = textlib.replaceCategoryLinks(
            text, categories, self.site).rpartition('\n\n')  # fixme: safer
        return before + '\n' + after
Esempio n. 29
0
    def duplicateSortKey(self, match):
        text = match.group()
        matches = list(self.defaultsortR.finditer(text))
        if not matches:
            return text

        defaultsort = matches.pop().group(1).strip()
        categories = textlib.getCategoryLinks(text, site=self.site)
        changed = False
        for category in categories:
            if self.tidy_sortkey(category.sortKey) == defaultsort:
                category.sortKey = None
                changed = True

        if changed:
            categories.sort(key=self.sort_category)
            before, _, after = textlib.replaceCategoryLinks(
                text, categories, self.site).rpartition('\n\n')  # fixme: safer
            return before + '\n' + after
        else:
            return text
Esempio n. 30
0
    def has_valid_content(self) -> bool:
        """Test page only contains a single call to the index template."""
        text = self.text

        if not text.startswith('{{' + self.INDEX_TEMPLATE):
            return False

        # Discard possible categories after INDEX_TEMPLATE
        categories = textlib.getCategoryLinks(text, self.site)
        for cat in categories:
            text = text.replace('\n' + cat.title(as_link=True), '')

        if not text.endswith('}}'):
            return False

        # Discard all inner templates as only top-level ones matter
        templates = textlib.extract_templates_and_params_regex_simple(text)
        if len(templates) != 1 or templates[0][0] != self.INDEX_TEMPLATE:
            # Only a single call to the INDEX_TEMPLATE is allowed
            return False

        return True
Esempio n. 31
0
 def treat_page(self):
     page = self.current_page
     categories = textlib.getCategoryLinks(page.text, site=self.site)
     titles = (cat.title(with_ns=False,
                         with_section=False,
                         allow_interwiki=False,
                         insite=self.site) for cat in categories)
     matches = list(filter(bool, map(self.categoryR.fullmatch, titles)))
     if not matches:
         pywikibot.output('No birthdate category found')
         return
     fullmatch = matches.pop()
     if matches:
         pywikibot.output('Multiple birthdate categories found')
         return
     birth_date = fullmatch.group(1)
     search_query = 'linksto:"%s"' % page.title()  # todo: sanitize?
     search_query += r' insource:/\[\[[^\[\]]+\]\]'
     search_query += r' +\(\* *\[*%s\]*\)/' % birth_date
     search_query += ' -intitle:"Seznam"'
     pattern = r'\[\[((?:%s)(?:\|[^\[\]]+)?)\]\]' % '|'.join(
         re.escape(p.title())
         for p in chain([page],
                        page.backlinks(followRedirects=False,
                                       filterRedirects=True,
                                       namespaces=[0])))
     pattern += r' +\(\* *(\[\[)?(%s)(\]\])?\)' % birth_date
     regex = re.compile(pattern)
     for ref_page in PreloadingGenerator(
             SearchPageGenerator(search_query,
                                 namespaces=[0],
                                 site=self.site)):
         new_text, num = regex.subn(self.replace_callback, ref_page.text)
         if num:
             self.userPut(ref_page,
                          ref_page.text,
                          new_text,
                          summary='doplnění data úmrtí')
Esempio n. 32
0
def categoricAncestor(page, depth):
    # Returns the *categoric ancestor* of the page,
    # the highest-level category which has a sideicon related to it,
    # if it exists and is unique, and False otherwise.
    if page in ancestorMemo:
        return ancestorMemo[page]
    # print('\t' * depth + page.title())
    MAX_RECURSION_DEPTH = 20
    ancestors = set()
    for cat in textlib.getCategoryLinks(page.text):
        tit = cat.title()[9:]
        if tit in codenames:
            ancestors.add(cat)
            # sys.stdout.write('\t' * (depth + 1) + cat.title() + ' *\n')
            continue
        elif depth < MAX_RECURSION_DEPTH:
            thisAnc = categoricAncestor(cat, depth + 1)
            if thisAnc:
                ancestors.add(thisAnc)
    if len(ancestors) == 1:
        ancestorMemo[page] = list(ancestors)[0]
        return categoricAncestor(page, 0)
    ancestorMemo[page] = False
    return False
Esempio n. 33
0
    def treat(self, page):
        """Work on each page retrieved from generator."""
        original_text = page.text
        applied = set()
        new_text = original_text
        last_text = None
        context = 0
        while True:
            if self.isTextExcepted(new_text):
                pywikibot.output(
                    'Skipping {} because it contains text '
                    'that is on the exceptions list.'.format(page))
                return

            while new_text != last_text:
                last_text = new_text
                new_text = self.apply_replacements(last_text, applied, page)
                if not self.opt.recursive:
                    break

            if new_text == original_text:
                pywikibot.output('No changes were necessary in ' +
                                 page.title(as_link=True))
                return

            if self.opt.addcat:
                # Fetch only categories in wikitext, otherwise the others
                # will be explicitly added.
                cats = textlib.getCategoryLinks(new_text, site=page.site)
                if self.opt.addcat not in cats:
                    cats.append(self.opt.addcat)
                    new_text = textlib.replaceCategoryLinks(new_text,
                                                            cats,
                                                            site=page.site)
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            self.current_page = page
            pywikibot.showDiff(original_text, new_text, context=context)
            if self.opt.always:
                break

            choice = pywikibot.input_choice(
                'Do you want to accept these changes?',
                [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'),
                 ('edit Latest', 'l'), ('open in Browser', 'b'),
                 ('More context', 'm'), ('All', 'a')],
                default='N')
            if choice == 'm':
                context = context * 3 if context else 3
                continue
            if choice in ('e', 'l'):
                text_editor = editor.TextEditor()
                edit_text = original_text if choice == 'e' else new_text
                as_edited = text_editor.edit(edit_text)
                # if user didn't press Cancel
                if as_edited and as_edited != new_text:
                    new_text = as_edited
                    if choice == 'l':
                        # prevent changes from being applied again
                        last_text = new_text
                continue
            if choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                try:
                    original_text = page.get(get_redirect=True, force=True)
                except pywikibot.NoPage:
                    pywikibot.output('Page {0} has been deleted.'.format(
                        page.title()))
                    break
                new_text = original_text
                last_text = None
                continue
            if choice == 'a':
                self.opt.always = True
            if choice == 'y':
                self.save(page,
                          original_text,
                          new_text,
                          applied,
                          show_diff=False,
                          quiet=True,
                          callback=self._replace_async_callback,
                          asynchronous=True)
            while not self._pending_processed_titles.empty():
                proc_title, res = self._pending_processed_titles.get()
                pywikibot.output('Page {0}{1} saved'.format(
                    proc_title, '' if res else ' not'))
            # choice must be 'N'
            break

        if self.opt.always and new_text != original_text:
            self.save(page,
                      original_text,
                      new_text,
                      applied,
                      show_diff=False,
                      asynchronous=False)
Esempio n. 34
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '{}{}'.format(config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '{}{}'.format(config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text

    if putText and text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if not putText:
            return (text, newtext, always)

        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)

        if always or choice == 'y':
            result = put_text(page,
                              newtext,
                              summary,
                              error_count,
                              asynchronous=not always)
            if result is not None:
                return (result, result, always)
            error_count += 1
Esempio n. 35
0
def add_text(
    page,
    addText: str,
    summary: Optional[str] = None,
    regexSkip: Optional[str] = None,
    regexSkipUrl: Optional[str] = None,
    always: bool = False,
    up: bool = False,
    putText: bool = True,
    oldTextGiven: Optional[str] = None,
    reorderEnabled: bool = True,
    create: bool = False
) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]:
    """
    Add text to a page.

    @param page: The page to add text to
    @type page: pywikibot.page.BasePage
    @param addText: Text to add
    @param summary: Summary of changes. If None, beginning of addText is used.
    @param regexSkip: Abort if text on page matches
    @param regexSkipUrl: Abort if full url matches
    @param always: Always add text without user confirmation
    @param up: If True, add text to top of page, else add at bottom.
    @param putText: If True, save changes to the page, else return
        (_, newtext, _)
    @param oldTextGiven: If None fetch page text, else use this text
    @param reorderEnabled: If True place text above categories and
        interwiki, else place at page bottom. No effect if up = False.
    @param create: Create page if it does not exist
    @return: If putText=True: (success, success, always)
        else: (_, newtext, _)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if reorderEnabled:
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '\n' + addText
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '\n' + addText
    else:
        newtext = addText + '\n' + text

    if not putText:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        return (text, newtext, always)

    if text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                continue

        # either always or choice == 'y' is selected
        result = put_text(page,
                          newtext,
                          summary,
                          error_count,
                          asynchronous=not always)
        if result is not None:
            return (result, result, always)
        error_count += 1
Esempio n. 36
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output("Skipping %s because the title is on the exceptions list." % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output("You can't edit page %s" % page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output("Page %s not found" % page.title(asLink=True))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(
                     "Skipping %s because it contains text "
                     "that is on the exceptions list." % page.title(asLink=True)
                 )
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied, page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output("No changes were necessary in %s" % page.title(asLink=True))
                 break
             if hasattr(self, "addedCat"):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(color_format("\n\n>>> {lightpurple}{0}{default} <<<", page.title()))
             pywikibot.showDiff(original_text, new_text)
             if self.getOption("always"):
                 break
             choice = pywikibot.input_choice(
                 "Do you want to accept these changes?",
                 [("Yes", "y"), ("No", "n"), ("Edit", "e"), ("open in Browser", "b"), ("all", "a")],
                 default="N",
             )
             if choice == "e":
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == "b":
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output("Page %s has been deleted." % page.title())
                     break
                 new_text = original_text
                 continue
             if choice == "a":
                 self.options["always"] = True
             if choice == "y":
                 page.text = new_text
                 page.save(
                     summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True
                 )
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
             # choice must be 'N'
             break
         if self.getOption("always") and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output("Skipping %s because of edit conflict" % (page.title(),))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output("Cannot change %s because of blacklist entry %s" % (page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output("Skipping %s (locked page)" % (page.title(),))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output("Error putting page: %s" % (error.args,))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
def put_cats(page, new_cats, summary=None, always=False):
    line_sep = pywikibot.config.line_separator
    if not summary:
        summary = "Adding categories using catfiles"

    oldtext = page.get()
    old_cats = textlib.getCategoryLinks(oldtext)
    old_templates = textlib.extract_templates_and_params(oldtext)
    old_template_titles = [i[0].lower() for i in old_templates]

    templates, cats = [], []
    for val in new_cats:
        if val.lower().startswith('category:'):
            tmp_cat = pywikibot.Category(pywikibot.Link(val, page.site))
            if tmp_cat not in old_cats:
                cats.append(tmp_cat)
        elif val.lower().startswith('{{'):
            tmp_templates = textlib.extract_templates_and_params(val)
            if len(tmp_templates) != 1:
                logging.warn("There was an error when parsing the template "
                             "'{0}'. Contact the developer, skipping it for "
                             "now.".format(val))
            tmp_template = tmp_templates[0]
            if tmp_template[0].lower() not in old_template_titles:
                templates.append(val)

    # Add templates to the top, and the categories to the bottom.
    newtext = oldtext
    if len(templates) > 0:
        newtext = line_sep.join(templates) + line_sep + newtext
    if len(cats) > 0:
        newtext = (newtext + line_sep +
                   line_sep.join(c.title(asLink=True, underscore=False)
                                 for c in cats))

    if oldtext == newtext:
        pywikibot.output("No changes to the page need to be made.")
        return

    while True:
        # Show the diff that has been created
        pywikibot.output(color_format(
            '\n\n>>> {lightpurple}{0}{default} <<<',
            page.title(underscore=False)))
        pywikibot.showDiff(oldtext, newtext)

        if always:
            choice = 'y'
        else:
            # Ask user whether to accept
            choice = pywikibot.input_choice(
                'Do you want to accept these changes?',
                [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                 ('Open browser', 'b')],
                'n', automatic_quit=False)
        # Apply the choice from above
        if choice == 'n':
            break
        elif choice == 'b':
            pywikibot.bot.open_webbrowser(page)
        elif choice == 'e':
            editor = pywikibot.editor.TextEditor()
            as_edited = editor.edit(newtext)
            if as_edited and as_edited != newtext:
                newtext = as_edited
        elif choice == 'y':
            try:
                page.put_async(newtext, summary)
            except pywikibot.EditConflict:
                pywikibot.output('Edit conflict! Skipping')
            except pywikibot.ServerError:
                pywikibot.output('Server Error! Skipping')
            except pywikibot.SpamfilterError as e:
                pywikibot.output(
                    'Cannot change %s because of blacklist entry %s'
                    % (page.title(), e.url))
            except pywikibot.LockedPage:
                pywikibot.output('Skipping %s (locked page)' % page.title())
            except pywikibot.PageNotSaved as error:
                pywikibot.output('Error putting page: %s' % error.args)
            break
Esempio n. 38
0
    def treat_page(self):
        """Load the given page, do some changes, and save it."""
        refR = re.compile(r'(?P<all><ref.*?</ref>)')
        # clenaupR = re.compile(r'(?i){{dopracować.*?}}')
        text = self.current_page.text
        links = {
            'links': 0,
            'cat': 0,
            'template': 0,
            'infobox': 0,
            'refs': 0,
            'dopracować': False
        }
        # cleanupTmpl = False
        summary = []

        if self.current_page.isRedirectPage():
            pywikibot.output(u'Page %s is REDIRECT!' %
                             self.current_page.title())
            return
        elif self.current_page.isDisambig():
            pywikibot.output(u'Page %s is DISAMBIG!' %
                             self.current_page.title())
            return
        else:
            if self.opt.test:
                pywikibot.output(u'Title:%s' % self.current_page.title())
                pywikibot.output(u'Depth:%s' % self.current_page.depth)
            for l in self.current_page.linkedPages(namespaces=0):
                if self.opt.test:
                    pywikibot.output(u'Links to:[[%s]]' % l.title())
                links['links'] += 1
                # pywikibot.output(u'Links:%s' % len(list(self.current_page.linkedPages(namespaces=0))))
            for t, p in textlib.extract_templates_and_params(
                    text, remove_disabled_parts=True):
                if self.opt.test:
                    pywikibot.output('Template:[[%s]]' % t)
                links['template'] += 1
                if 'infobox' in t:
                    links['infobox'] += 1
                if 'dopracować' in t.lower():
                    links['dopracować'] = True
                if t.lower(
                ) in tmplcat:  #  check for category adding templates
                    links['cat'] += 1
                    if self.opt.test:
                        pywikibot.output('Current cat#%i' % links['cat'])
                    # cleanupTmpl = (t, p)
                # if 'rok w' in t or 'Rok w' in t:
                #     links['cat'] += 1

            for c in textlib.getCategoryLinks(text):
                if self.opt.test:
                    pywikibot.output('Category:%s' % c)
                links['cat'] += 1
                if self.opt.test:
                    pywikibot.output('Current cat#%i' % links['cat'])
            for r in refR.finditer(text):
                if self.opt.test:
                    pywikibot.output('Ref:%s' % r.group('all'))
                links['refs'] += 1
            if self.opt.test:
                pywikibot.output('Links=%s' % links)
                # pywikibot.output('Cleanup=%s' % re.sub('\n','',textlib.glue_template_and_params(cleanupTmpl)))

        if links['dopracować']:
            if self.opt.test:
                pywikibot.output('Cleanup Tmpl FOUND')
        else:
            # add {{Dopracować}}
            t = 'Dopracować'  # template title
            p = {}  # template params
            today = datetime.now()
            datestr = today.strftime('%Y-%m')
            if self.opt.test:
                pywikibot.output('Date:%s' % datestr)
            if not (links['links'] and links['cat']):
                if not links['links']:
                    p['linki'] = datestr
                    summary.append('linki')
                if not links['cat']:
                    p['kategoria'] = datestr
                    summary.append('kategorie')
                # if not links['refs']:
                #    p['przypisy'] = datestr
                #    summary.append('przypisy')
            cleanupTmpl = (t, p)

            if not p:
                if self.opt.test:
                    pywikibot.output('Nothing to add')
                return

            if self.opt.test:
                pywikibot.output('Cleanup Tmpl TO ADD')
                pywikibot.output('summary:%s' % summary)
                pywikibot.output('params:%s' % p)
            text = re.sub(
                '\n', '',
                textlib.glue_template_and_params(cleanupTmpl)) + '\n' + text

            # if summary option is None, it takes the default i18n summary from
            # i18n subdirectory with summary_key as summary key.
            self.put_current(
                text,
                summary=
                'Sprawdzanie nowych stron, w artykule należy dopracować: %s' %
                ','.join(summary))
Esempio n. 39
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Esempio n. 40
0
def add_text(page, addText, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -exceptUrl '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -except '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext,
                                                   categoriesInside, site,
                                                   True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n', automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Esempio n. 41
0
 def assertRoundtripCategory(self, text, catcount):
     cats = textlib.getCategoryLinks(text)
     self.assertEqual(len(cats), catcount)
     self.assertEqual(text, textlib.replaceCategoryLinks(text,
                                                         cats,
                                                         site = self.site))
Esempio n. 42
0
 def test_standard_links(self):
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or
            #               name.title() == name.title().split(":")[0] + title):
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Esempio n. 44
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page %s" %
                                  page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' %
                              page.title(asLink=True))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.' %
                                  page.title(asLink=True))
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s' %
                                  page.title(asLink=True))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                 page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 u'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                  ('open in Browser', 'b'), ('all', 'a')],
                 default='N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.' %
                                      page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.put_async(new_text,
                                self.generate_summary(applied),
                                callback=self.count_changes)
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.put(new_text,
                          self.generate_summary(applied),
                          callback=self.count_changes)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping %s because of edit conflict' %
                                  (page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change %s because of blacklist entry %s' %
                     (page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping %s (locked page)' %
                                  (page.title(), ))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: %s' %
                                  (error.args, ))
Esempio n. 45
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Esempio n. 46
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 'Skipping {0} because the title is on the exceptions list.'
                 .format(page.title(as_link=True)))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output("You can't edit page " +
                                  page.title(as_link=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output('Page {0} not found'.format(
                 page.title(as_link=True)))
             continue
         applied = set()
         new_text = original_text
         last_text = None
         context = 0
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output('Skipping {0} because it contains text '
                                  'that is on the exceptions list.'.format(
                                      page.title(as_link=True)))
                 break
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied,
                                                    page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output('No changes were necessary in ' +
                                  page.title(as_link=True))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others
                 # will be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                              page.title()))
             pywikibot.showDiff(original_text, new_text, context=context)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'),
                  ('edit Latest', 'l'), ('open in Browser', 'b'),
                  ('More context', 'm'), ('All', 'a')],
                 default='N')
             if choice == 'm':
                 context = context * 3 if context else 3
                 continue
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'l':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(new_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                     # prevent changes from being applied again
                     last_text = new_text
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output('Page {0} has been deleted.'.format(
                         page.title()))
                     break
                 new_text = original_text
                 last_text = None
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           asynchronous=True,
                           callback=self._replace_async_callback,
                           quiet=True)
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output('Page {0}{1} saved'.format(
                     proc_title, '' if res else ' not'))
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           callback=self._replace_sync_callback,
                           quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output(
                     'Skipping {0} because of edit conflict'.format(
                         page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     'Cannot change {0} because of blacklist entry {1}'.
                     format(page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output('Skipping {0} (locked page)'.format(
                     page.title(), ))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output('Error putting page: {0}'.format(
                     error.args, ))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output('Page {0}{1} saved'.format(
                         proc_title, '' if res else ' not'))
Esempio n. 47
0
 def test_standard_links(self):
     """Test getting and replacing categories."""
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)
Esempio n. 48
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping {0!s} because the title is on the exceptions list.'.format(page.title(asLink=True)))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page {0!s}".format(page.title(asLink=True)))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page {0!s} not found'.format(page.title(asLink=True)))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.'
                                  % page.title(asLink=True))
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied,
                                                    page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in {0!s}'.format(page.title(asLink=True)))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(color_format(
                 '\n\n>>> {lightpurple}{0}{default} <<<', page.title()))
             pywikibot.showDiff(original_text, new_text)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 u'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                  ('open in Browser', 'b'), ('all', 'a')],
                 default='N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page {0!s} has been deleted.'.format(page.title()))
                     break
                 new_text = original_text
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied), async=True,
                           callback=self._count_changes, quiet=True)
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           callback=self._count_changes, quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping {0!s} because of edit conflict'.format(page.title()))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change {0!s} because of blacklist entry {1!s}'.format(page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping {0!s} (locked page)'.format(page.title()))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: {0!s}'.format(error.args))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
Esempio n. 49
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site, 'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text, interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
Esempio n. 50
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site,
                                      'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text,
                                                interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
 def apply(self, text, page):
     categories = textlib.getCategoryLinks(text)
     if len(categories) > len(set(categories)):
         deduplicate(categories)
         text = textlib.replaceCategoryLinks(text, categories, page.site)
     return text
Esempio n. 52
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" %
                                 page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(
                    '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')],
                    'n',
                    automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext,
                                 summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext,
                                       summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s' %
                        (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)' %
                                     page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Esempio n. 53
0
 def test_standard_links(self):
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)