Exemple #1
0
    def run(self):
        """Run the bot."""
        if not all((self.opt.action, self.generator)):
            return
        catmode = (self.opt.action == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page',
                                       'Category')[catmode])(commons,
                                                             page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(with_ns=not catmode)
                    if page.title() == pagetitle:
                        old_text = page.get()
                        text = old_text

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(
                            self, 'findTemplate{}'.format(
                                (2, 3)[catmode]).search(text))
                        if s or s2:
                            pywikibot.output('** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                '%s{{commons%s|%s}}' %
                                (text, ('', 'cat')[catmode], pagetitle),
                                cats,
                                site=page.site)
                            comment = i18n.twtranslate(
                                page.site,
                                'commons_link{}-template-added'.format(
                                    ('', '-cat')[catmode]))
                            try:
                                self.userPut(page,
                                             old_text,
                                             text,
                                             summary=comment)
                            except EditConflictError:
                                pywikibot.output(
                                    'Skipping {} because of edit conflict'.
                                    format(page.title()))

                except NoPageError:
                    pywikibot.output('{} does not exist in Commons'.format(
                        page.__class__.__name__))

            except NoPageError:
                pywikibot.output('Page {} does not exist'.format(page.title()))
            except IsRedirectPageError:
                pywikibot.output('Page {} is a redirect; skipping.'.format(
                    page.title()))
            except LockedPageError:
                pywikibot.output('Page {} is locked'.format(page.title()))
Exemple #2
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(text,
                                                      insite=self.site,
                                                      template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text,
                                                interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
 def apply(self, page, summaries=[], *args):
     result = super().apply(page, summaries, *args)
     if result:
         categories = textlib.getCategoryLinks(page.text, site=self.site)
         categories.sort(key=self.sortCategories)
         page.text = textlib.replaceCategoryLinks(page.text, categories,
                                                  self.site)
     return result
    def run(self):
        """Run the bot."""
        if not all((self.getOption('action'), self.generator)):
            return
        catmode = (self.getOption('action') == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page',
                                       'Category')[catmode])(commons,
                                                             page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(withNamespace=not catmode)
                    if page.title() == pagetitle:
                        oldText = page.get()
                        text = oldText

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(self, 'findTemplate%d' %
                                     (2, 3)[catmode]).search(text)
                        if s or s2:
                            pywikibot.output(u'** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                u'%s{{commons%s|%s}}' %
                                (text, ('', 'cat')[catmode], pagetitle),
                                cats,
                                site=page.site)
                            comment = i18n.twtranslate(
                                page.site, 'commons_link%s-template-added' %
                                ('', '-cat')[catmode])
                            try:
                                self.userPut(page,
                                             oldText,
                                             text,
                                             summary=comment)
                            except pywikibot.EditConflict:
                                pywikibot.output(
                                    u'Skipping %s because of edit conflict' %
                                    page.title())

                except pywikibot.NoPage:
                    pywikibot.output(u'%s does not exist in Commons' %
                                     page.__class__.__name__)

            except pywikibot.NoPage:
                pywikibot.output(u'Page %s does not exist' % page.title())
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect; skipping.' %
                                 page.title())
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked' % page.title())
Exemple #5
0
 def test_adjoining_links(self):
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(self.cats, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     self.assertEqual(old, new)
     config.line_separator = sep  # restore the default separator
Exemple #6
0
def make_categories(page, list, site=None):
    if site is None:
        site = pywikibot.Site()
    pllist = []
    for p in list:
        cattitle = "%s:%s" % (site.namespaces.CATEGORY, p)
        pllist.append(pywikibot.Page(site, cattitle))
    page.put_async(textlib.replaceCategoryLinks(page.get(), pllist,
                                                site=page.site),
                   summary=i18n.twtranslate(site, 'catall-changing'))
Exemple #7
0
def make_categories(page, list, site=None):
    if site is None:
        site = pywikibot.Site()
    pllist = []
    for p in list:
        cattitle = "%s:%s" % (site.category_namespace(), p)
        pllist.append(pywikibot.Page(site, cattitle))
    page.put_async(textlib.replaceCategoryLinks(page.get(), pllist,
                                                site=page.site),
                   summary=i18n.twtranslate(site, 'catall-changing'))
Exemple #8
0
def make_categories(page, list, site=None):
    """Make categories."""
    if site is None:
        site = pywikibot.Site()
    pllist = []
    for p in list:
        pllist.append(pywikibot.Page(site, 'Category:' + p))
    page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site),
             asynchronous=True,
             summary=i18n.twtranslate(site, 'catall-changing'))
 def test_adjoining_links(self):
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, "")
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ""  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
Exemple #10
0
    def run(self):
        """Run the bot."""
        if not all((self.getOption('action'), self.generator)):
            return
        catmode = (self.getOption('action') == 'categories')
        for page in self.generator:
            try:
                self.current_page = page
                commons = page.site.image_repository()
                commonspage = getattr(pywikibot,
                                      ('Page', 'Category')[catmode]
                                      )(commons, page.title())
                try:
                    commonspage.get(get_redirect=True)
                    pagetitle = commonspage.title(withNamespace=not catmode)
                    if page.title() == pagetitle:
                        oldText = page.get()
                        text = oldText

                        # for Commons/Commonscat template
                        s = self.findTemplate.search(text)
                        s2 = getattr(self, 'findTemplate%d'
                                           % (2, 3)[catmode]).search(text)
                        if s or s2:
                            pywikibot.output(u'** Already done.')
                        else:
                            cats = textlib.getCategoryLinks(text,
                                                            site=page.site)
                            text = textlib.replaceCategoryLinks(
                                u'%s{{commons%s|%s}}'
                                % (text, ('', 'cat')[catmode], pagetitle),
                                cats, site=page.site)
                            comment = i18n.twtranslate(
                                page.site, 'commons_link%s-template-added'
                                % ('', '-cat')[catmode])
                            try:
                                self.userPut(page, oldText, text,
                                             summary=comment)
                            except pywikibot.EditConflict:
                                pywikibot.output(
                                    u'Skipping %s because of edit conflict'
                                    % page.title())

                except pywikibot.NoPage:
                    pywikibot.output(u'%s does not exist in Commons'
                                     % page.__class__.__name__)

            except pywikibot.NoPage:
                pywikibot.output(u'Page %s does not exist' % page.title())
            except pywikibot.IsRedirectPage:
                pywikibot.output(u'Page %s is a redirect; skipping.'
                                 % page.title())
            except pywikibot.LockedPage:
                pywikibot.output(u'Page %s is locked' % page.title())
Exemple #11
0
def make_categories(page, list, site=None):
    """Make categories."""
    if site is None:
        site = pywikibot.Site()
    pllist = []
    for p in list:
        cattitle = "%s:%s" % (site.namespaces.CATEGORY, p)
        pllist.append(pywikibot.Page(site, cattitle))
    page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site),
             asynchronous=True,
             summary=i18n.twtranslate(site, 'catall-changing'))
Exemple #12
0
 def test_adjoining_links(self):
     """Test getting and replacing adjacent categories."""
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
Exemple #13
0
 def test_adjoining_links(self):
     """Test getting and replacing adjacent categories."""
     cats_std = textlib.getCategoryLinks(self.old, site=self.site)
     old = self.old.replace(config.LS, '')
     cats = textlib.getCategoryLinks(old, site=self.site)
     self.assertEqual(cats_std, cats)
     sep = config.LS
     config.line_separator = ''  # use an empty separator temporarily
     new = textlib.replaceCategoryLinks(old, cats, site=self.site)
     # Restore the default separator.
     config.line_separator = sep
     self.assertEqual(old, new)
def replace_default_cat_with_new_categories_in_image_text(
        old_text, base_category, new_categories):
    """Add new categories to page text and remove any base_category."""
    if not new_categories:
        # No categories to add. We do not want to remove the base one,
        raise NoCategoryToAddException()

    # Remove base category
    page_text_without_base_category = textlib.replaceCategoryInPlace(
        old_text, base_category, None)
    final_text = textlib.replaceCategoryLinks(
        page_text_without_base_category, new_categories, addOnly=True)
    return final_text
Exemple #15
0
 def include(self,
             pl,
             checklinks=True,
             realinclude=True,
             linkterm=None,
             summary=''):
     """Include the current page to the working category."""
     global workingcat, parentcats
     global checked, tocheck
     cl = checklinks
     mysite = self.site
     if linkterm:
         actualworkingcat = pywikibot.Category(mysite,
                                               workingcat.title(),
                                               sort_key=linkterm)
     else:
         actualworkingcat = workingcat
     if realinclude:
         try:
             text = pl.get()
         except pywikibot.NoPage:
             pass
         except pywikibot.IsRedirectPage:
             cl = True
         else:
             cats = [x for x in pl.categories()]
             if workingcat not in cats:
                 cats = [x for x in pl.categories()]
                 for c in cats:
                     if c in parentcats:
                         if self.removeparent:
                             pl.change_category(actualworkingcat,
                                                summary=summary)
                             break
                 else:
                     pl.put(textlib.replaceCategoryLinks(text,
                                                         cats +
                                                         [actualworkingcat],
                                                         site=pl.site),
                            summary=summary)
     if cl:
         if self.checkforward:
             for page2 in pl.linkedPages():
                 if self.needcheck(page2):
                     tocheck.append(page2)
                     checked[page2] = page2
         if self.checkbackward:
             for ref_page in pl.getReferences():
                 if self.needcheck(ref_page):
                     tocheck.append(ref_page)
                     checked[ref_page] = ref_page
Exemple #16
0
def make_categories(page, list: list, site=None):
    """Make categories.

    :param page: The page to update and save
    :type page: pywikibot.page.BasePage
    :param list: The list which contains categories
    """
    if site is None:
        site = pywikibot.Site()
    pllist = []
    for p in list:
        pllist.append(pywikibot.Page(site, 'Category:' + p))
    page.put(textlib.replaceCategoryLinks(page.get(), pllist, site=page.site),
             asynchronous=True,
             summary=i18n.twtranslate(site, 'catall-changing'))
Exemple #17
0
def addCategory(site, page, cat):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)

    catpl = pywikibot.Category(site, cat)

    if catpl not in cats:
        print("\t'" + cat + "' not in page categories. Adding") 
        cats.append(catpl)
        text = textlib.replaceCategoryLinks(page.text, cats, site=site)
        userPut(page, old_text, text, minor=True, botflag=True)
        return True
    else:
        print("\t'" + cat + "' already in page categories")
        return False
Exemple #18
0
 def treat(self, page):
     """Process one page."""
     if page.isRedirectPage():
         # if it's a redirect use the redirect target instead
         redirTarget = page.getRedirectTarget()
         if self.follow_redirects:
             self.current_page = redirTarget
         else:
             pywikibot.warning(
                 "Page %s is a redirect to %s; skipping." % (page.title(asLink=True), redirTarget.title(asLink=True))
             )
             # loading it will throw an error if we don't jump out before
             return
     else:
         self.current_page = page
     if self.current_page.exists():
         # Load the page
         text = self.current_page.text
     elif self.create:
         pywikibot.output("Page %s doesn't exist yet; creating." % (self.current_page.title(asLink=True)))
         text = ""
     else:
         pywikibot.output("Page %s does not exist; skipping." % self.current_page.title(asLink=True))
         return
     # store old text, so we don't have reload it every time
     old_text = text
     cats = textlib.getCategoryLinks(text)
     pywikibot.output("Current categories:")
     for cat in cats:
         pywikibot.output("* %s" % cat.title())
     catpl = pywikibot.Category(self.current_page.site, self.newcat)
     if catpl in cats:
         pywikibot.output("%s is already in %s." % (self.current_page.title(), catpl.title()))
     else:
         if self.sort:
             catpl = self.sorted_by_last_name(catpl, self.current_page)
         pywikibot.output("Adding %s" % catpl.title(asLink=True))
         cats.append(catpl)
         text = textlib.replaceCategoryLinks(text, cats, site=self.current_page.site)
         comment = self.comment
         if not comment:
             comment = i18n.twtranslate(
                 self.current_page.site, "category-adding", {"newcat": catpl.title(withNamespace=False)}
             )
         try:
             self.userPut(self.current_page, old_text, text, summary=comment, minor=True, botflag=True)
         except pywikibot.PageSaveRelatedError as error:
             pywikibot.output("Page %s not saved: %s" % (self.current_page.title(asLink=True), error))
def remove_categories():
    site = pywikibot.Site()
    category = pywikibot.Category(site, "Category:Files")
    generator = pagegenerators.PreloadingGenerator(
        pagegenerators.CategorizedPageGenerator(category=category))
    for page in generator:
        pywikibot.output("")

        pywikibot.output(vars(page))
        pywikibot.output(page.text)

        page.text = textlib.replaceCategoryLinks(page.text, [], site=page.site)

        page.save(summary="Remove categories.",
                  # asynchronous=True,
                  )
Exemple #20
0
def set_category_status(site, page, cat, status):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)

    if status:
        if catpl not in cats:
            cats.append(catpl)
    else:
        if catpl in cats:
            cats.remove(catpl)
    text = textlib.replaceCategoryLinks(page.text, cats, site=site)
    if old_text != text:
        page.text = text
        page.save(minor=True, botflag=True)
        return True
    return False
Exemple #21
0
def set_category_status(site, page, cat, status):
    old_text = page.text
    cats = textlib.getCategoryLinks(old_text)
    catpl = pywikibot.Category(site, cat)

    if status:
        if catpl not in cats:
            cats.append(catpl)
    else:
        if catpl in cats:
            cats.remove(catpl)
    text = textlib.replaceCategoryLinks(page.text, cats, site=site)
    if old_text != text:
        page.text = text
        page.save(minor=True, botflag=True)
        return True
    return False
    def apply(self, page, *args):
        # remove empty list items
        page.text = re.sub(r'^\* *\n', '', page.text, flags=re.M)

        # sort categories
        categories = textlib.getCategoryLinks(page.text, site=page.site)
        category_living = pywikibot.Category(page.site, 'Žijící lidé')
        if category_living in categories:
            if any(
                    cat.title(with_ns=False).startswith('Úmrtí ')
                    for cat in categories):
                categories.remove(category_living)
                page.text = textlib.replaceCategoryLinks(
                    page.text, categories, page.site)

        page.text = re.sub(
            r'(\{\{ *(?:%s)[^}]+\}\}\n)\n(\[\[(?:%s))' %
            ('|'.join(map(re.escape, self.site.getmagicwords('defaultsort'))),
             '|'.join(self.site.namespaces[14])), r'\1\2', page.text)
    def harvestSortKey(self, match):
        text = match.group()
        if self.defaultsortR.search(text):
            return text

        keys = {}
        categories = textlib.getCategoryLinks(text, site=self.site)
        if not any(
                category.title(with_ns=False) in ('Muži', 'Žijící lidé',
                                                  'Ženy')
                for category in categories):
            return text

        for category in categories:
            key = category.sortKey
            if key:
                key = self.tidy_sortkey(key)
                if not key.strip():
                    continue
                keys.setdefault(key, 0.0)
                keys[key] += 1
                if len(keys) > 1:
                    return text

        if not keys:
            return text

        if sum(keys.values()) < 4:
            return text

        key = list(keys.keys()).pop()
        for category in categories:
            if category.sortKey is not None:
                if self.tidy_sortkey(category.sortKey) == key:
                    category.sortKey = None

        categories.sort(key=self.sort_category)
        text = textlib.removeCategoryLinks(text, self.site)
        text += '\n\n{{DEFAULTSORT:%s}}' % key
        before, _, after = textlib.replaceCategoryLinks(
            text, categories, self.site).rpartition('\n\n')  # fixme: safer
        return before + '\n' + after
Exemple #24
0
def include(pl, checklinks=True, realinclude=True, linkterm=None):
    cl = checklinks
    if linkterm:
        actualworkingcat = pywikibot.Category(mysite,
                                              workingcat.title(),
                                              sortKey=linkterm)
    else:
        actualworkingcat = workingcat
    if realinclude:
        try:
            text = pl.get()
        except pywikibot.NoPage:
            pass
        except pywikibot.IsRedirectPage:
            cl = True
            pass
        else:
            cats = [x for x in pl.categories()]
            if workingcat not in cats:
                cats = [x for x in pl.categories()]
                for c in cats:
                    if c in parentcats:
                        if removeparent:
                            pl.change_category(actualworkingcat)
                            break
                else:
                    pl.put(
                        textlib.replaceCategoryLinks(text,
                                                     cats + [actualworkingcat],
                                                     site=pl.site))
    if cl:
        if checkforward:
            for page2 in pl.linkedPages():
                if needcheck(page2):
                    tocheck.append(page2)
                    checked[page2] = page2
        if checkbackward:
            for refPage in pl.getReferences():
                if needcheck(refPage):
                    tocheck.append(refPage)
                    checked[refPage] = refPage
Exemple #25
0
    def include(self, page, checklinks=True, realinclude=True, linkterm=None):
        """Include the current page to the working category."""
        global workingcat, parentcats
        global checked
        actualworkingcat = workingcat
        if linkterm:
            actualworkingcat.sortKey = linkterm
        if realinclude and page.exists():
            if page.isRedirectPage():
                checklinks = True
            else:
                cats = list(page.categories())
                if workingcat not in cats \
                   and not self.change_category(page, cats):
                    newtext = textlib.replaceCategoryLinks(
                        page.text, cats + [actualworkingcat],
                        site=page.site)
                    page.put(newtext, summary=self.opt.summary)

        if checklinks:
            self.checklinks(page)
    def duplicateSortKey(self, match):
        text = match.group()
        matches = list(self.defaultsortR.finditer(text))
        if not matches:
            return text

        defaultsort = matches.pop().group(1).strip()
        categories = textlib.getCategoryLinks(text, site=self.site)
        changed = False
        for category in categories:
            if self.tidy_sortkey(category.sortKey) == defaultsort:
                category.sortKey = None
                changed = True

        if changed:
            categories.sort(key=self.sort_category)
            before, _, after = textlib.replaceCategoryLinks(
                text, categories, self.site).rpartition('\n\n')  # fixme: safer
            return before + '\n' + after
        else:
            return text
Exemple #27
0
def include(pl, checklinks=True, realinclude=True, linkterm=None, summary=''):
    cl = checklinks
    if linkterm:
        actualworkingcat = pywikibot.Category(mysite, workingcat.title(),
                                              sortKey=linkterm)
    else:
        actualworkingcat = workingcat
    if realinclude:
        try:
            text = pl.get()
        except pywikibot.NoPage:
            pass
        except pywikibot.IsRedirectPage:
            cl = True
            pass
        else:
            cats = [x for x in pl.categories()]
            if workingcat not in cats:
                cats = [x for x in pl.categories()]
                for c in cats:
                    if c in parentcats:
                        if removeparent:
                            pl.change_category(actualworkingcat,
                                               summary=summary)
                            break
                else:
                    pl.put(textlib.replaceCategoryLinks(
                        text, cats + [actualworkingcat], site=pl.site),
                        summary=summary)
    if cl:
        if checkforward:
            for page2 in pl.linkedPages():
                if needcheck(page2):
                    tocheck.append(page2)
                    checked[page2] = page2
        if checkbackward:
            for refPage in pl.getReferences():
                if needcheck(refPage):
                    tocheck.append(refPage)
                    checked[refPage] = refPage
def categorize_file_page(site, page, p=0):
    status = {"f": 0, "p": 1, "w": 0, "e": 0}

    pywikibot.output(f"Page {p + 1}:")

    is_file_page = page.is_filepage()
    file_page = None
    file_info = None
    if is_file_page:
        file_page = pywikibot.FilePage(page)
        try:
            file_info = file_page.latest_file_info
        except (NoPage, PageRelatedError):
            is_file_page = False

    if not is_file_page:
        pywikibot.error("Page \"" + page.title(as_link=True) +
                        "\" is not a file page. Skipping page...")
        pywikibot.output("")
        status["e"] += 1
        return status

    uri = file_page.full_url()
    mime = file_info.mime.lower()

    pywikibot.output("    Title: " + file_page.title(as_link=True))
    pywikibot.output("    URI: " + uri)
    pywikibot.output("    MIME type: " + mime)

    # Find a matching category for the file's MIME type or extension.
    mime_categories = flattened_categories["mime_categories"]
    mime_pattern_category_regexes = flattened_categories[
        "mime_pattern_category_regexes"]

    found_category = None
    if mime in mime_categories:
        found_category = mime_categories[mime]
    else:
        pywikibot.warning(
            f"Unrecognized MIME type \"{mime}\". Attempting to search by regex..."
        )
        status["w"] += 1

        for pattern_category_regex in mime_pattern_category_regexes:
            regex = pattern_category_regex["regex"]
            if regex.search(mime) is not None:
                category = pattern_category_regex["category"]
                pywikibot.warning(
                    f"Found category \"{category}\" for unrecognized MIME type \"{mime}\"."
                )
                status["w"] += 1
                found_category = category
                break

    if found_category is None:
        pywikibot.warning(
            f"No category found for MIME type \"{mime}\". Attempting to search by file extension..."
        )
        status["w"] += 1

        file_extension = os.path.splitext(uri)[1][1:].strip().lower()
        if len(file_extension) > 0:
            extension_categories = flattened_categories["extension_categories"]
            extension_pattern_category_regexes = flattened_categories[
                "extension_pattern_category_regexes"]

            if file_extension in extension_categories:
                found_category = extension_categories[file_extension]
            else:
                pywikibot.warning(
                    f"Unrecognized file extension \"{file_extension}\". Attempting to search by regex..."
                )
                status["w"] += 1

                for pattern_category_regex in extension_pattern_category_regexes:
                    regex = pattern_category_regex["regex"]
                    if regex.search(file_extension) is not None:
                        category = pattern_category_regex["category"]
                        pywikibot.warning(
                            f"Found category \"{category}\" for unrecognized file extension \"{file_extension}\"."
                        )
                        status["w"] += 1
                        found_category = category
                        break

        if found_category is None:
            pywikibot.error(
                f"No category found for MIME type \"{mime}\" or file extension \"{file_extension}\". Skipping file page..."
            )
            pywikibot.output("")
            status["e"] += 1
            return status

    # Build categories, and add them to the file page.
    categories = build_categories(found_category)
    pywikibot.output("    Add categories: " + ", ".join(categories))

    page_categories = []
    category_wikilinks = []
    for category in categories:
        page_category = pywikibot.Page(site, "Category:" + category)
        page_categories.append(page_category)

        category_wikilink = "[[Category:{0}|{0}]]".format(category)
        category_wikilinks.append(category_wikilink)

    file_page.text = textlib.replaceCategoryLinks(file_page.text,
                                                  page_categories,
                                                  site=file_page.site,
                                                  addOnly=True)

    summary = "Add the {0} {1}.".format(
        "categor" + ("y" if len(category_wikilinks) <= 1 else "ies"),
        ", ".join(category_wikilinks))

    file_page.save(summary=summary, minor=False)

    status["f"] += 1

    return status
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site,
                                      'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text,
                                                categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text,
                                                interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
Exemple #30
0
def add_text(page, addText, summary=None, regexSkip=None,
             regexSkipUrl=None, always=False, up=False, putText=True,
             oldTextGiven=None, reorderEnabled=True, create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!"
                                 % page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -exceptUrl '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output(
                'Exception! regex (or word) used with -except '
                'is in the page. Skip!\n'
                'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext,
                                                   categoriesInside, site,
                                                   True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n', automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext, summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext, summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s'
                        % (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)'
                                     % page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
Exemple #31
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 'Skipping {0} because the title is on the exceptions list.'
                 .format(page.title(as_link=True)))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output("You can't edit page " +
                                  page.title(as_link=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output('Page {0} not found'.format(
                 page.title(as_link=True)))
             continue
         applied = set()
         new_text = original_text
         last_text = None
         context = 0
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output('Skipping {0} because it contains text '
                                  'that is on the exceptions list.'.format(
                                      page.title(as_link=True)))
                 break
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied,
                                                    page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output('No changes were necessary in ' +
                                  page.title(as_link=True))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others
                 # will be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                              page.title()))
             pywikibot.showDiff(original_text, new_text, context=context)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'),
                  ('edit Latest', 'l'), ('open in Browser', 'b'),
                  ('More context', 'm'), ('All', 'a')],
                 default='N')
             if choice == 'm':
                 context = context * 3 if context else 3
                 continue
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'l':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(new_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                     # prevent changes from being applied again
                     last_text = new_text
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output('Page {0} has been deleted.'.format(
                         page.title()))
                     break
                 new_text = original_text
                 last_text = None
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           asynchronous=True,
                           callback=self._replace_async_callback,
                           quiet=True)
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output('Page {0}{1} saved'.format(
                     proc_title, '' if res else ' not'))
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           callback=self._replace_sync_callback,
                           quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output(
                     'Skipping {0} because of edit conflict'.format(
                         page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     'Cannot change {0} because of blacklist entry {1}'.
                     format(page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output('Skipping {0} (locked page)'.format(
                     page.title(), ))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output('Error putting page: {0}'.format(
                     error.args, ))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output('Page {0}{1} saved'.format(
                         proc_title, '' if res else ' not'))
 def test_standard_links(self):
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)
Exemple #33
0
 def assertRoundtripCategory(self, text, catcount):
     cats = textlib.getCategoryLinks(text)
     self.assertEqual(len(cats), catcount)
     self.assertEqual(text, textlib.replaceCategoryLinks(text,
                                                         cats,
                                                         site = self.site))
Exemple #34
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page %s" %
                                  page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' %
                              page.title(asLink=True))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.' %
                                  page.title(asLink=True))
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s' %
                                  page.title(asLink=True))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(
                 u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                 page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 u'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                  ('open in Browser', 'b'), ('all', 'a')],
                 default='N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.' %
                                      page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.put_async(new_text,
                                self.generate_summary(applied),
                                callback=self.count_changes)
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.put(new_text,
                          self.generate_summary(applied),
                          callback=self.count_changes)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping %s because of edit conflict' %
                                  (page.title(), ))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change %s because of blacklist entry %s' %
                     (page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping %s (locked page)' %
                                  (page.title(), ))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: %s' %
                                  (error.args, ))
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put to the correct
        position and into the right order. This combines the old instances
        standardizeInterwiki and standardizeCategories.
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. interwiki links

        """
        categories = None
        interwikiLinks = None

        # Pywikibot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if (re.search(u"(.+?)\|(.{,1}?)",name.title()) or
            #               name.title() == name.title().split(":")[0] + title):
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Exemple #36
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '{}{}'.format(config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '{}{}'.format(config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text

    if putText and text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if not putText:
            return (text, newtext, always)

        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)

        if always or choice == 'y':
            result = put_text(page,
                              newtext,
                              summary,
                              error_count,
                              asynchronous=not always)
            if result is not None:
                return (result, result, always)
            error_count += 1
Exemple #37
0
    def treat(self, page):
        """Work on each page retrieved from generator."""
        original_text = page.text
        applied = set()
        new_text = original_text
        last_text = None
        context = 0
        while True:
            if self.isTextExcepted(new_text):
                pywikibot.output(
                    'Skipping {} because it contains text '
                    'that is on the exceptions list.'.format(page))
                return

            while new_text != last_text:
                last_text = new_text
                new_text = self.apply_replacements(last_text, applied, page)
                if not self.opt.recursive:
                    break

            if new_text == original_text:
                pywikibot.output('No changes were necessary in ' +
                                 page.title(as_link=True))
                return

            if self.opt.addcat:
                # Fetch only categories in wikitext, otherwise the others
                # will be explicitly added.
                cats = textlib.getCategoryLinks(new_text, site=page.site)
                if self.opt.addcat not in cats:
                    cats.append(self.opt.addcat)
                    new_text = textlib.replaceCategoryLinks(new_text,
                                                            cats,
                                                            site=page.site)
            # Show the title of the page we're working on.
            # Highlight the title in purple.
            self.current_page = page
            pywikibot.showDiff(original_text, new_text, context=context)
            if self.opt.always:
                break

            choice = pywikibot.input_choice(
                'Do you want to accept these changes?',
                [('Yes', 'y'), ('No', 'n'), ('Edit original', 'e'),
                 ('edit Latest', 'l'), ('open in Browser', 'b'),
                 ('More context', 'm'), ('All', 'a')],
                default='N')
            if choice == 'm':
                context = context * 3 if context else 3
                continue
            if choice in ('e', 'l'):
                text_editor = editor.TextEditor()
                edit_text = original_text if choice == 'e' else new_text
                as_edited = text_editor.edit(edit_text)
                # if user didn't press Cancel
                if as_edited and as_edited != new_text:
                    new_text = as_edited
                    if choice == 'l':
                        # prevent changes from being applied again
                        last_text = new_text
                continue
            if choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                try:
                    original_text = page.get(get_redirect=True, force=True)
                except pywikibot.NoPage:
                    pywikibot.output('Page {0} has been deleted.'.format(
                        page.title()))
                    break
                new_text = original_text
                last_text = None
                continue
            if choice == 'a':
                self.opt.always = True
            if choice == 'y':
                self.save(page,
                          original_text,
                          new_text,
                          applied,
                          show_diff=False,
                          quiet=True,
                          callback=self._replace_async_callback,
                          asynchronous=True)
            while not self._pending_processed_titles.empty():
                proc_title, res = self._pending_processed_titles.get()
                pywikibot.output('Page {0}{1} saved'.format(
                    proc_title, '' if res else ' not'))
            # choice must be 'N'
            break

        if self.opt.always and new_text != original_text:
            self.save(page,
                      original_text,
                      new_text,
                      applied,
                      show_diff=False,
                      asynchronous=False)
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Exemple #39
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links, categories and star templates are
        put to the correct position and into the right order. This combines the
        old instances standardizeInterwiki and standardizeCategories
        The page footer has the following section in that sequence:
        1. categories
        2. ## TODO: template beyond categories ##
        3. additional information depending on local site policy
        4. stars templates for featured and good articles
        5. interwiki links

        """
        starsList = [
            u'bueno',
            u'bom interwiki',
            u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed',
            u'destacado', u'destaca[tu]',
            u'enllaç[ _]ad',
            u'enllaz[ _]ad',
            u'leam[ _]vdc',
            u'legătură[ _]a[bcf]',
            u'liamm[ _]pub',
            u'lien[ _]adq',
            u'lien[ _]ba',
            u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt',
            u'liên[ _]kết[ _]chọn[ _]lọc',
            u'ligam[ _]adq',
            u'ligazón[ _]a[bd]',
            u'ligoelstara',
            u'ligoleginda',
            u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km',
            u'link[ _]sm', u'linkfa',
            u'na[ _]lotura',
            u'nasc[ _]ar',
            u'tengill[ _][úg]g',
            u'ua',
            u'yüm yg',
            u'רא',
            u'وصلة مقالة جيدة',
            u'وصلة مقالة مختارة',
        ]

        categories = None
        interwikiLinks = None
        allstars = []

        # The PyWikipediaBot is no longer allowed to touch categories on the
        # German Wikipedia. See
        # https://de.wikipedia.org/wiki/Hilfe_Diskussion:Personendaten/Archiv/1#Position_der_Personendaten_am_.22Artikelende.22
        # ignoring nn-wiki of cause of the comment line above iw section
        if not self.template and '{{Personendaten' not in text and \
           '{{SORTIERUNG' not in text and '{{DEFAULTSORT' not in text and \
           self.site.code not in ('et', 'it', 'bg', 'ru'):
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:  # and pywikibot.calledModuleName() <> 'interwiki':
            subpage = False
            if self.template:
                loc = None
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    pass
                if loc is not None and loc in self.title:
                    subpage = True
            interwikiLinks = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # Removing the interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)
            # Removing the stars' issue
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(r'(\{\{(?:template:|)%s\|.*?\}\}[\s]*)'
                                   % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    text = regex.sub('', text)
                    allstars += found

        # Adding categories
        if categories:
            # TODO: Sorting categories in alphabetic order.
            # e.g. using categories.sort()

            # TODO: Taking main cats to top
            #   for name in categories:
            #       if re.search(u"(.+?)\|(.{,1}?)",name.title()) or name.title()==name.title().split(":")[0]+title:
            #            categories.remove(name)
            #            categories.insert(0, name)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)
        # Adding stars templates
        if allstars:
            text = text.strip() + self.site.family.interwiki_text_separator
            allstars.sort()
            for element in allstars:
                text += '%s%s' % (element.strip(), config.line_separator)
                pywikibot.log(u'%s' % element.strip())
        # Adding the interwiki
        if interwikiLinks:
            text = textlib.replaceLanguageLinks(text, interwikiLinks,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)
        return text
Exemple #40
0
    def standardizePageFooter(self, text):
        """
        Standardize page footer.

        Makes sure that interwiki links and categories are put
        into the correct position and into the right order. This
        combines the old instances of standardizeInterwiki
        and standardizeCategories.

        The page footer consists of the following parts
        in that sequence:
        1. categories
        2. additional information depending on the local site policy
        3. interwiki
        """
        categories = []
        interwiki_links = []

        # get categories
        if not self.template:
            categories = textlib.getCategoryLinks(text, site=self.site)

        if not self.talkpage:
            subpage = False
            if self.template:
                try:
                    tmpl, loc = moved_links[self.site.code]
                    del tmpl
                except KeyError:
                    loc = None
                if loc is not None and loc in self.title:
                    subpage = True

            # get interwiki
            interwiki_links = textlib.getLanguageLinks(
                text, insite=self.site, template_subpage=subpage)

            # remove interwiki
            text = textlib.removeLanguageLinks(text, site=self.site)

        # add categories, main to top
        if categories:
            # TODO: Sort categories in alphabetic order, e.g. using
            # categories.sort()? (T100265)
            # TODO: Get main categories from Wikidata?
            main = pywikibot.Category(self.site, 'Category:' + self.title,
                                      sort_key=' ')
            if main in categories:
                categories.pop(categories.index(main))
                categories.insert(0, main)
            text = textlib.replaceCategoryLinks(text, categories,
                                                site=self.site)

        # add interwiki
        if interwiki_links:
            text = textlib.replaceLanguageLinks(text, interwiki_links,
                                                site=self.site,
                                                template=self.template,
                                                template_subpage=subpage)

        return text
Exemple #41
0
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping {0!s} because the title is on the exceptions list.'.format(page.title(asLink=True)))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page {0!s}".format(page.title(asLink=True)))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page {0!s} not found'.format(page.title(asLink=True)))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.'
                                  % page.title(asLink=True))
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied,
                                                    page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in {0!s}'.format(page.title(asLink=True)))
                 break
             if hasattr(self, 'addedCat'):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(color_format(
                 '\n\n>>> {lightpurple}{0}{default} <<<', page.title()))
             pywikibot.showDiff(original_text, new_text)
             if self.getOption('always'):
                 break
             choice = pywikibot.input_choice(
                 u'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                  ('open in Browser', 'b'), ('all', 'a')],
                 default='N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page {0!s} has been deleted.'.format(page.title()))
                     break
                 new_text = original_text
                 continue
             if choice == 'a':
                 self.options['always'] = True
             if choice == 'y':
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied), async=True,
                           callback=self._count_changes, quiet=True)
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
             # choice must be 'N'
             break
         if self.getOption('always') and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied),
                           callback=self._count_changes, quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping {0!s} because of edit conflict'.format(page.title()))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change {0!s} because of blacklist entry {1!s}'.format(page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping {0!s} (locked page)'.format(page.title()))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: {0!s}'.format(error.args))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output('Page {0!s}{1!s} saved'.format(proc_title, '' if res else ' not'))
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output(
                 u'Skipping %s because the title is on the exceptions list.'
                 % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output(u"You can't edit page %s"
                                  % page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output(u'Page %s not found' % page.title(asLink=True))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(u'Skipping %s because it contains text '
                                  u'that is on the exceptions list.'
                                  % page.title(asLink=True))
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output(u'No changes were necessary in %s'
                                  % page.title(asLink=True))
                 break
             if hasattr(self, "addedCat"):
                 cats = page.categories(nofollow_redirects=True)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text,
                                                             cats,
                                                             site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                              % page.title())
             pywikibot.showDiff(original_text, new_text)
             if self.acceptall:
                 break
             choice = pywikibot.input_choice(
                 u'Do you want to accept these changes?',
                 [('Yes', 'y'), ('No', 'n'), ('Edit', 'e'),
                  ('open in Browser', 'b'), ('all', 'a')],
                 default='N')
             if choice == 'e':
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == 'b':
                 webbrowser.open("http://%s%s" % (
                     page.site.hostname(),
                     page.site.nice_get_address(page.title(asUrl=True))
                 ))
                 i18n.input('pywikibot-enter-finished-browser')
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output(u'Page %s has been deleted.'
                                      % page.title())
                     break
                 new_text = original_text
                 continue
             if choice == 'a':
                 self.acceptall = True
             if choice == 'y':
                 page.put_async(new_text, self.generate_summary(applied), callback=self.count_changes)
             # choice must be 'N'
             break
         if self.acceptall and new_text != original_text:
             try:
                 page.put(new_text, self.generate_summary(applied), callback=self.count_changes)
             except pywikibot.EditConflict:
                 pywikibot.output(u'Skipping %s because of edit conflict'
                                  % (page.title(),))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output(
                     u'Cannot change %s because of blacklist entry %s'
                     % (page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output(u'Skipping %s (locked page)'
                                  % (page.title(),))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output(u'Error putting page: %s'
                                  % (error.args,))
 def run(self):
     """Start the bot."""
     # Run the generator which will yield Pages which might need to be
     # changed.
     for page in self.generator:
         if self.isTitleExcepted(page.title()):
             pywikibot.output("Skipping %s because the title is on the exceptions list." % page.title(asLink=True))
             continue
         try:
             # Load the page's text from the wiki
             original_text = page.get(get_redirect=True)
             if not page.canBeEdited():
                 pywikibot.output("You can't edit page %s" % page.title(asLink=True))
                 continue
         except pywikibot.NoPage:
             pywikibot.output("Page %s not found" % page.title(asLink=True))
             continue
         applied = set()
         new_text = original_text
         while True:
             if self.isTextExcepted(new_text):
                 pywikibot.output(
                     "Skipping %s because it contains text "
                     "that is on the exceptions list." % page.title(asLink=True)
                 )
                 break
             last_text = None
             while new_text != last_text:
                 last_text = new_text
                 new_text = self.apply_replacements(last_text, applied, page)
                 if not self.recursive:
                     break
             if new_text == original_text:
                 pywikibot.output("No changes were necessary in %s" % page.title(asLink=True))
                 break
             if hasattr(self, "addedCat"):
                 # Fetch only categories in wikitext, otherwise the others will
                 # be explicitly added.
                 cats = textlib.getCategoryLinks(new_text, site=page.site)
                 if self.addedCat not in cats:
                     cats.append(self.addedCat)
                     new_text = textlib.replaceCategoryLinks(new_text, cats, site=page.site)
             # Show the title of the page we're working on.
             # Highlight the title in purple.
             pywikibot.output(color_format("\n\n>>> {lightpurple}{0}{default} <<<", page.title()))
             pywikibot.showDiff(original_text, new_text)
             if self.getOption("always"):
                 break
             choice = pywikibot.input_choice(
                 "Do you want to accept these changes?",
                 [("Yes", "y"), ("No", "n"), ("Edit", "e"), ("open in Browser", "b"), ("all", "a")],
                 default="N",
             )
             if choice == "e":
                 editor = editarticle.TextEditor()
                 as_edited = editor.edit(original_text)
                 # if user didn't press Cancel
                 if as_edited and as_edited != new_text:
                     new_text = as_edited
                 continue
             if choice == "b":
                 pywikibot.bot.open_webbrowser(page)
                 try:
                     original_text = page.get(get_redirect=True, force=True)
                 except pywikibot.NoPage:
                     pywikibot.output("Page %s has been deleted." % page.title())
                     break
                 new_text = original_text
                 continue
             if choice == "a":
                 self.options["always"] = True
             if choice == "y":
                 page.text = new_text
                 page.save(
                     summary=self.generate_summary(applied), async=True, callback=self._count_changes, quiet=True
                 )
             while not self._pending_processed_titles.empty():
                 proc_title, res = self._pending_processed_titles.get()
                 pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
             # choice must be 'N'
             break
         if self.getOption("always") and new_text != original_text:
             try:
                 page.text = new_text
                 page.save(summary=self.generate_summary(applied), callback=self._count_changes, quiet=True)
             except pywikibot.EditConflict:
                 pywikibot.output("Skipping %s because of edit conflict" % (page.title(),))
             except pywikibot.SpamfilterError as e:
                 pywikibot.output("Cannot change %s because of blacklist entry %s" % (page.title(), e.url))
             except pywikibot.LockedPage:
                 pywikibot.output("Skipping %s (locked page)" % (page.title(),))
             except pywikibot.PageNotSaved as error:
                 pywikibot.output("Error putting page: %s" % (error.args,))
             if self._pending_processed_titles.qsize() > 50:
                 while not self._pending_processed_titles.empty():
                     proc_title, res = self._pending_processed_titles.get()
                     pywikibot.output("Page %s%s saved" % (proc_title, "" if res else " not"))
Exemple #44
0
 def test_standard_links(self):
     """Test getting and replacing categories."""
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)
def add_text(
    page,
    addText: str,
    summary: Optional[str] = None,
    regexSkip: Optional[str] = None,
    regexSkipUrl: Optional[str] = None,
    always: bool = False,
    up: bool = False,
    putText: bool = True,
    oldTextGiven: Optional[str] = None,
    reorderEnabled: bool = True,
    create: bool = False
) -> Union[Tuple[bool, bool, bool], Tuple[str, str, bool]]:
    """
    Add text to a page.

    @param page: The page to add text to
    @type page: pywikibot.page.BasePage
    @param addText: Text to add
    @param summary: Summary of changes. If None, beginning of addText is used.
    @param regexSkip: Abort if text on page matches
    @param regexSkipUrl: Abort if full url matches
    @param always: Always add text without user confirmation
    @param up: If True, add text to top of page, else add at bottom.
    @param putText: If True, save changes to the page, else return
        (_, newtext, _)
    @param oldTextGiven: If None fetch page text, else use this text
    @param reorderEnabled: If True place text above categories and
        interwiki, else place at page bottom. No effect if up = False.
    @param create: Create page if it does not exist
    @return: If putText=True: (success, success, always)
        else: (_, newtext, _)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})
    if putText:
        pywikibot.output('Loading {}...'.format(page.title()))

    text = get_text(page, oldTextGiven, create)
    if text is None:
        return (False, False, always)

    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: {}'.format(result))
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', '\n')
        if reorderEnabled:
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += '\n' + addText
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += '\n' + addText
    else:
        newtext = addText + '\n' + text

    if not putText:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        return (text, newtext, always)

    if text != newtext:
        pywikibot.output(
            color_format('\n\n>>> {lightpurple}{0}{default} <<<',
                         page.title()))
        pywikibot.showDiff(text, newtext)

    # Let's put the changes.
    error_count = 0
    while True:
        if not always:
            try:
                choice = pywikibot.input_choice(
                    'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')], 'n')
            except QuitKeyboardInterrupt:
                sys.exit('User quit bot run.')

            if choice == 'a':
                always = True
            elif choice == 'n':
                return (False, False, always)
            elif choice == 'b':
                pywikibot.bot.open_webbrowser(page)
                continue

        # either always or choice == 'y' is selected
        result = put_text(page,
                          newtext,
                          summary,
                          error_count,
                          asynchronous=not always)
        if result is not None:
            return (result, result, always)
        error_count += 1
Exemple #46
0
 def apply(self, text, page):
     categories = textlib.getCategoryLinks(text)
     if len(categories) > len(set(categories)):
         deduplicate(categories)
         text = textlib.replaceCategoryLinks(text, categories, page.site)
     return text
Exemple #47
0
def add_text(page,
             addText,
             summary=None,
             regexSkip=None,
             regexSkipUrl=None,
             always=False,
             up=False,
             putText=True,
             oldTextGiven=None,
             reorderEnabled=True,
             create=False):
    """
    Add text to a page.

    @rtype: tuple of (text, newtext, always)
    """
    site = page.site
    if not summary:
        summary = i18n.twtranslate(site, 'add_text-adding',
                                   {'adding': addText[:200]})

    # When a page is tagged as "really well written" it has a star in the
    # interwiki links. This is a list of all the templates used (in regex
    # format) to make the stars appear.

    errorCount = 0

    if putText:
        pywikibot.output(u'Loading %s...' % page.title())
    if oldTextGiven is None:
        try:
            text = page.get()
        except pywikibot.NoPage:
            if create:
                pywikibot.output(u"%s doesn't exist, creating it!" %
                                 page.title())
                text = u''
            else:
                pywikibot.output(u"%s doesn't exist, skip!" % page.title())
                return (False, False, always)
        except pywikibot.IsRedirectPage:
            pywikibot.output(u"%s is a redirect, skip!" % page.title())
            return (False, False, always)
    else:
        text = oldTextGiven
    # Understand if the bot has to skip the page or not
    # In this way you can use both -except and -excepturl
    if regexSkipUrl is not None:
        url = page.full_url()
        result = re.findall(regexSkipUrl, site.getUrl(url))
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -exceptUrl '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    if regexSkip is not None:
        result = re.findall(regexSkip, text)
        if result != []:
            pywikibot.output('Exception! regex (or word) used with -except '
                             'is in the page. Skip!\n'
                             'Match was: %s' % result)
            return (False, False, always)
    # If not up, text put below
    if not up:
        newtext = text
        # Translating the \\n into binary \n
        addText = addText.replace('\\n', config.line_separator)
        if (reorderEnabled):
            # Getting the categories
            categoriesInside = textlib.getCategoryLinks(newtext, site)
            # Deleting the categories
            newtext = textlib.removeCategoryLinks(newtext, site)
            # Getting the interwiki
            interwikiInside = textlib.getLanguageLinks(newtext, site)
            # Removing the interwiki
            newtext = textlib.removeLanguageLinks(newtext, site)

            # Adding the text
            newtext += u"%s%s" % (config.line_separator, addText)
            # Reputting the categories
            newtext = textlib.replaceCategoryLinks(newtext, categoriesInside,
                                                   site, True)
            # Dealing the stars' issue
            allstars = []
            starstext = textlib.removeDisabledParts(text)
            for star in starsList:
                regex = re.compile(
                    '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I)
                found = regex.findall(starstext)
                if found != []:
                    newtext = regex.sub('', newtext)
                    allstars += found
            if allstars != []:
                newtext = newtext.strip() + config.line_separator * 2
                allstars.sort()
                for element in allstars:
                    newtext += '%s%s' % (element.strip(), config.LS)
            # Adding the interwiki
            newtext = textlib.replaceLanguageLinks(newtext, interwikiInside,
                                                   site)
        else:
            newtext += u"%s%s" % (config.line_separator, addText)
    else:
        newtext = addText + config.line_separator + text
    if putText and text != newtext:
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" %
                         page.title())
        pywikibot.showDiff(text, newtext)
    # Let's put the changes.
    while True:
        # If someone load it as module, maybe it's not so useful to put the
        # text in the page
        if putText:
            if not always:
                choice = pywikibot.input_choice(
                    u'Do you want to accept these changes?',
                    [('Yes', 'y'), ('No', 'n'), ('All', 'a'),
                     ('open in Browser', 'b')],
                    'n',
                    automatic_quit=False)
                if choice == 'a':
                    always = True
                elif choice == 'n':
                    return (False, False, always)
                elif choice == 'b':
                    pywikibot.bot.open_webbrowser(page)
            if always or choice == 'y':
                try:
                    if always:
                        page.put(newtext,
                                 summary,
                                 minorEdit=page.namespace() != 3)
                    else:
                        page.put_async(newtext,
                                       summary,
                                       minorEdit=page.namespace() != 3)
                except pywikibot.EditConflict:
                    pywikibot.output(u'Edit conflict! skip!')
                    return (False, False, always)
                except pywikibot.ServerError:
                    errorCount += 1
                    if errorCount < config.max_retries:
                        pywikibot.output(u'Server Error! Wait..')
                        time.sleep(config.retry_wait)
                        continue
                    else:
                        raise pywikibot.ServerError(u'Fifth Server Error!')
                except pywikibot.SpamfilterError as e:
                    pywikibot.output(
                        u'Cannot change %s because of blacklist entry %s' %
                        (page.title(), e.url))
                    return (False, False, always)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Skipping %s (locked page)' %
                                     page.title())
                    return (False, False, always)
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Error putting page: %s' % error.args)
                    return (False, False, always)
                else:
                    # Break only if the errors are one after the other...
                    errorCount = 0
                    return (True, True, always)
        else:
            return (text, newtext, always)
 def test_standard_links(self):
     cats = textlib.getCategoryLinks(self.old, site=self.site)
     new = textlib.replaceCategoryLinks(self.old, cats, site=self.site)
     self.assertEqual(self.old, new)