Ejemplo n.º 1
0
    def ReplaceLink(self, text, oldtxt, newtxt):

        frmParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)]
        toParts = [s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)]

        if len(frmParts) != len(toParts):
            raise ValueError("Splitting parts do not match counts")
        for i in xrange(0, len(frmParts)):
            if len(frmParts[i]) != len(toParts[i]):
                raise ValueError("Splitting parts do not match word length")
            if len(frmParts[i]) > 0:
                text = text.replace(first_lower(frmParts[i]), first_lower(toParts[i]))
                text = text.replace(first_upper(frmParts[i]), first_upper(toParts[i]))
        return text
 def treat_page_and_item(self, page, item):
     title = page.properties().get('displaytitle')
     if not title:
         return
     page_title = page.title()
     if first_lower(page_title) != title:
         return
     lang = page.site.lang
     label = item.labels.get(lang)
     if not label or self.stripped(label) == self.stripped(page_title):
         item.labels[lang] = first_lower(label) if label else title
         summary = 'importing [%s] label from displaytitle in %s' % (
             lang, page.title(as_link=True, insite=item.site))
         self.user_edit_entity(item, summary=summary)
Ejemplo n.º 3
0
    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == 'wikipedia:arz':
            return text
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace == 0:
                # skip main (article) namespace
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            if namespace == 6 and self.site.family.name == 'wikipedia':
                if self.site.code in ('en', 'fr') and MediaWikiVersion(
                        self.site.version()) >= MediaWikiVersion('1.14'):
                    # do not change "Image" on en-wiki and fr-wiki
                    assert u'Image' in namespaces
                    namespaces.remove(u'Image')
                if self.site.code == 'hu':
                    # do not change "Kép" on hu-wiki
                    assert u'Kép' in namespaces
                    namespaces.remove(u'Kép')
                elif self.site.code == 'pt':
                    # use "Imagem" by default on pt-wiki (per T57242)
                    assert 'Imagem' in namespaces
                    namespaces.insert(
                        0, namespaces.pop(namespaces.index('Imagem')))
            # final namespace variant
            final_ns = namespaces.pop(0)
            if namespace in (2, 3):
                # skip localized user namespace, maybe gender is used
                namespaces = ['User' if namespace == 2 else 'User talk']
            # lowerspaced and underscored namespaces
            for i, item in enumerate(namespaces):
                item = item.replace(' ', '[ _]')
                item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(final_ns))
            if final_ns and namespaces:
                if self.site.sitename == 'wikipedia:pt' and namespace == 6:
                    # only change on these file extensions (per T57242)
                    extensions = ('png', 'gif', 'jpg', 'jpeg', 'svg', 'tiff',
                                  'tif')
                    text = textlib.replaceExcept(
                        text,
                        r'\[\[\s*({}) *:(?P<name>[^\|\]]*?\.({}))'
                        r'(?P<label>.*?)\]\]'
                        .format('|'.join(namespaces), '|'.join(extensions)),
                        r'[[{}:\g<name>\g<label>]]'.format(final_ns),
                        exceptions)
                else:
                    text = textlib.replaceExcept(
                        text,
                        r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
                        % '|'.join(namespaces),
                        r'[[%s:\g<nameAndLabel>]]' % final_ns,
                        exceptions)
        return text
Ejemplo n.º 4
0
    def ReplaceLink(self, text, oldtxt, newtxt):
        """Replace links."""
        frmParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)
        ]
        toParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)
        ]

        if len(frmParts) != len(toParts):
            raise ValueError('Splitting parts do not match counts')
        for i, part in enumerate(frmParts):
            if part != len(toParts[i]):
                raise ValueError('Splitting parts do not match word length')
            if part:
                text = text.replace(first_lower(part), first_lower(toParts[i]))
                text = text.replace(first_upper(part), first_upper(toParts[i]))
        return text
Ejemplo n.º 5
0
def correctcap(link, text):
    # If text links to a page with title link uncapitalized, uncapitalize link,
    # otherwise capitalize it
    linkupper = link.title()
    linklower = first_lower(linkupper)
    if "[[%s]]" % linklower in text or "[[%s|" % linklower in text:
        return linklower
    else:
        return linkupper
Ejemplo n.º 6
0
def correctcap(link, text):
    # If text links to a page with title link uncapitalized, uncapitalize link,
    # otherwise capitalize it
    linkupper = link.title()
    linklower = first_lower(linkupper)
    if "[[%s]]" % linklower in text or "[[%s|" % linklower in text:
        return linklower
    else:
        return linkupper
Ejemplo n.º 7
0
    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == 'wikipedia:arz':
            return text
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace == 0:
                # skip main (article) namespace
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            if namespace == 6 and self.site.family.name == 'wikipedia':
                if self.site.code in ('en', 'fr') \
                        and self.site.mw_version >= '1.14':
                    # do not change "Image" on en-wiki and fr-wiki
                    assert 'Image' in namespaces
                    namespaces.remove('Image')
                if self.site.code == 'hu':
                    # do not change "Kép" on hu-wiki
                    assert 'Kép' in namespaces
                    namespaces.remove('Kép')
                elif self.site.code == 'pt':
                    # use "Imagem" by default on pt-wiki (per T57242)
                    assert 'Imagem' in namespaces
                    namespaces.insert(
                        0, namespaces.pop(namespaces.index('Imagem')))
            # final namespace variant
            final_ns = namespaces.pop(0)
            if namespace in (2, 3):
                # skip localized user namespace, maybe gender is used
                namespaces = ['User' if namespace == 2 else 'User talk']
            # lowerspaced and underscored namespaces
            for i, item in enumerate(namespaces):
                item = item.replace(' ', '[ _]')
                item = '[%s%s]' % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(final_ns))
            if final_ns and namespaces:
                if self.site.sitename == 'wikipedia:pt' and namespace == 6:
                    # only change on these file extensions (per T57242)
                    extensions = ('png', 'gif', 'jpg', 'jpeg', 'svg', 'tiff',
                                  'tif')
                    text = textlib.replaceExcept(
                        text, r'\[\[\s*({}) *:(?P<name>[^\|\]]*?\.({}))'
                        r'(?P<label>.*?)\]\]'.format('|'.join(namespaces),
                                                     '|'.join(extensions)),
                        r'[[{}:\g<name>\g<label>]]'.format(final_ns),
                        exceptions)
                else:
                    text = textlib.replaceExcept(
                        text, r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]' %
                        '|'.join(namespaces),
                        r'[[%s:\g<nameAndLabel>]]' % final_ns, exceptions)
        return text
Ejemplo n.º 8
0
    def get_missing_labels(self, sitelinks, dont):
        labels = {}
        for dbname in sitelinks:
            # [[d:Topic:Vedxkcb8ek6ss1pc]]
            if dbname.startswith('alswiki'):
                continue
            lang = self.normalize_lang(dbname.rpartition('wik')[0])
            if not lang or lang in dont:
                continue

            # try to defer this as much as possible
            link = sitelinks[dbname]
            title = link.canonical_title()

            # todo: check if this is still needed
            if ':' not in title and '/' in title:
                continue
            # [[d:Topic:Vhs5f72i5obvkr3t]]
            if title.startswith('Wikipedia:Artikelwerkstatt/'):
                continue
            # [[d:Topic:Vw8cayiif34m2eem]]
            if dbname.endswith('wikinews') and link.namespace == 14:
                continue
            # [[d:Topic:Vn16a76j30dblqo7]]
            if dbname == 'zh_yuewiki' and title.startswith('Portal:時人時事/'):
                continue
            # [[d:Topic:Vrel33kwnco2xp55]]
            if dbname.endswith('wikisource'):
                if link.namespace == link.site.namespaces.lookup_name(
                        'Author'):
                    title = title.partition(':')[2]
            # [[d:Topic:Uhdjlv9aae6iijuc]]
            # todo: create a lib for this
            if lang == 'fr' and title.startswith(
                ('Abbaye ', 'Cathédrale ', 'Chapelle ', 'Cloître ', 'Couvent ',
                 'Monastère ', 'Église ')):
                title = first_lower(title)
            label = labels.get(lang)
            if label and first_lower(label) != first_lower(title):
                labels.pop(lang)  # todo: better handling
                dont.add(lang)
                continue
            labels[lang] = title
        return labels
Ejemplo n.º 9
0
    def ReplaceLink(self, text, oldtxt, newtxt):

        frmParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(oldtxt)
        ]
        toParts = [
            s.strip(self.stripChars) for s in self.wordBreaker.split(newtxt)
        ]

        if len(frmParts) != len(toParts):
            raise ValueError(u'Splitting parts do not match counts')
        for i in xrange(0, len(frmParts)):
            if len(frmParts[i]) != len(toParts[i]):
                raise ValueError(u'Splitting parts do not match word length')
            if len(frmParts[i]) > 0:
                text = text.replace(first_lower(frmParts[i]),
                                    first_lower(toParts[i]))
                text = text.replace(first_upper(frmParts[i]),
                                    first_upper(toParts[i]))
        return text
 def get_missing_labels(self, sitelinks, dont):
     labels = {}
     for dbname, title in sitelinks.items():
         if ':' not in title and '/' in title:
             continue
         lang = self.normalize_lang(dbname.partition('wik')[0])
         if lang and lang not in dont:
             # [[d:Topic:Uhdjlv9aae6iijuc]]
             # todo: create a lib for this
             if lang == 'fr' and title.startswith(
                     ('Abbaye ', 'Cathédrale ', 'Chapelle ', 'Cloître ',
                      'Couvent ', 'Monastère ', 'Église ')):
                 title = first_lower(title)
             label = labels.get(lang)
             if label and first_lower(label) != first_lower(title):
                 labels.pop(lang)  # todo: better handling
                 dont.add(lang)
                 continue
             labels[lang] = title
     return labels
Ejemplo n.º 11
0
def correctcap(link, text):
    """
    Capitalize link.

    If text links to a page with title link uncapitalized, uncapitalize link,
    otherwise capitalize it
    """
    linkupper = link.title()
    linklower = first_lower(linkupper)
    if "[[{0!s}]]".format(linklower) in text or "[[{0!s}|".format(linklower) in text:
        return linklower
    else:
        return linkupper
 def get_missing_labels(self, sitelinks, dont):
     labels = {}
     for dbname, title in sitelinks.items():
         if ':' not in title and '/' in title:
             continue
         # fixme: 'wikidata' -> ('', 'wiki', 'data')
         # fixme: 'mediawikiwiki' -> ('media', 'wiki', 'wiki')
         lang = self.normalize_lang(dbname.partition('wik')[0])
         if lang and lang not in dont:
             # [[d:Topic:Uhdjlv9aae6iijuc]]
             # todo: create a lib for this
             if lang == 'fr' and title.startswith(
                     ('Abbaye ', 'Cathédrale ', 'Chapelle ', 'Cloître ',
                      'Couvent ', 'Monastère ', 'Église ')):
                 title = first_lower(title)
             label = labels.get(lang)
             if label and first_lower(label) != first_lower(title):
                 labels.pop(lang)  # todo: better handling
                 dont.add(lang)
                 continue
             labels[lang] = title
     return labels
Ejemplo n.º 13
0
    def from_cache(self, link):
        link = link.replace('_', ' ').strip()  # todo: normalize completely
        if link not in self.redirects:
            return False

        if link not in self.cache:
            page = pywikibot.Page(self.site, link)
            if not page.exists():
                pywikibot.warning('%s does not exist' % page.title())
                self.redirects.remove(link)  # fixme: both cases
                return False
            if not page.isRedirectPage():
                pywikibot.warning('%s is not a redirect' % page.title())
                self.redirects.remove(link)  # fixme: both cases
                return False

            target = page.getRedirectTarget()
            title = target.title()
            if link == first_lower(link):
                self.cache[link] = first_lower(title)
            else:
                self.cache[link] = title

        return self.cache[link]
Ejemplo n.º 14
0
def correctcap(link, text):
    """Return the link capitalized/uncapitalized according to the text.

    @param link: link page
    @type link: pywikibot.Page
    @param text: the wikitext that is supposed to refer to the link
    @type text: str
    @return: uncapitalized title of the link if the text links to the link
        with an uncapitalized title, else capitalized
    @rtype: str

    """
    linkupper = link.title()
    linklower = first_lower(linkupper)
    if "[[%s]]" % linklower in text or "[[%s|" % linklower in text:
        return linklower
    else:
        return linkupper
def correctcap(link, text):
    """Return the link capitalized/uncapitalized according to the text.

    @param link: link page
    @type link: pywikibot.Page
    @param text: the wikitext that is supposed to refer to the link
    @type text: str
    @return: uncapitalized title of the link if the text links to the link
        with an uncapitalized title, else capitalized
    @rtype: str

    """
    linkupper = link.title()
    linklower = first_lower(linkupper)
    if "[[%s]]" % linklower in text or "[[%s|" % linklower in text:
        return linklower
    else:
        return linkupper
Ejemplo n.º 16
0
    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == 'wikipedia:arz':
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace.id in (0, 2, 3):
                # skip main (article) namespace
                # skip user namespace, maybe gender is used
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            thisNs = namespaces.pop(0)
            if namespace.id == 6 and family.name == 'wikipedia':
                if self.site.code in ('en', 'fr') and MediaWikiVersion(
                        self.site.version()) >= MediaWikiVersion('1.14'):
                    # do not change "Image" on en-wiki and fr-wiki
                    assert u'Image' in namespaces
                    namespaces.remove(u'Image')
                if self.site.code == 'hu':
                    # do not change "Kép" on hu-wiki
                    assert u'Kép' in namespaces
                    namespaces.remove(u'Kép')
                elif self.site.code == 'pt':
                    # TODO: bug T57242
                    continue
            # lowerspaced and underscored namespaces
            for i in range(len(namespaces)):
                item = namespaces[i].replace(' ', '[ _]')
                item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(thisNs))
            if thisNs and namespaces:
                text = textlib.replaceExcept(
                    text,
                    r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
                    % '|'.join(namespaces),
                    r'[[%s:\g<nameAndLabel>]]' % thisNs,
                    exceptions)
        return text
    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == 'wikipedia:arz':
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ['nowiki', 'comment', 'math', 'pre']

        for namespace in self.site.namespaces.values():
            if namespace.id in (0, 2, 3):
                # skip main (article) namespace
                # skip user namespace, maybe gender is used
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            thisNs = namespaces.pop(0)
            if namespace.id == 6 and family.name == 'wikipedia':
                if self.site.code in ('en', 'fr') and \
                   MediaWikiVersion(self.site.version()) >= MediaWikiVersion('1.14'):
                    # do not change "Image" on en-wiki and fr-wiki
                    assert u'Image' in namespaces
                    namespaces.remove(u'Image')
                if self.site.code == 'hu':
                    # do not change "Kép" on hu-wiki
                    assert u'Kép' in namespaces
                    namespaces.remove(u'Kép')
                elif self.site.code == 'pt':
                    # TODO: bug T57242
                    continue
            # lowerspaced and underscored namespaces
            for i in range(len(namespaces)):
                item = namespaces[i].replace(' ', '[ _]')
                item = u'[%s%s]' % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(thisNs))
            if thisNs and namespaces:
                text = textlib.replaceExcept(
                    text,
                    r'\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]'
                    % '|'.join(namespaces),
                    r'[[%s:\g<nameAndLabel>]]' % thisNs,
                    exceptions)
        return text
Ejemplo n.º 18
0
    def translateAndCapitalizeNamespaces(self, text):
        """Use localized namespace names."""
        # arz uses english stylish codes
        if self.site.sitename == "wikipedia:arz":
            return text
        family = self.site.family
        # wiki links aren't parsed here.
        exceptions = ["nowiki", "comment", "math", "pre"]

        for namespace in self.site.namespaces.values():
            if namespace.id in (0, 2, 3):
                # skip main (article) namespace
                # skip user namespace, maybe gender is used
                continue
            # a clone is needed. Won't change the namespace dict
            namespaces = list(namespace)
            thisNs = namespaces.pop(0)
            if namespace.id == 6 and family.name == "wikipedia":
                if self.site.code in ("en", "fr") and MediaWikiVersion(self.site.version()) >= MediaWikiVersion("1.14"):
                    # do not change "Image" on en-wiki and fr-wiki
                    assert "Image" in namespaces
                    namespaces.remove("Image")
                if self.site.code == "hu":
                    # do not change "Kép" on hu-wiki
                    assert "Kép" in namespaces
                    namespaces.remove("Kép")
                elif self.site.code == "pt":
                    # TODO: bug T57242
                    continue
            # lowerspaced and underscored namespaces
            for i in range(len(namespaces)):
                item = namespaces[i].replace(" ", "[ _]")
                item = "[%s%s]" % (item[0], item[0].lower()) + item[1:]
                namespaces[i] = item
            namespaces.append(first_lower(thisNs))
            if thisNs and namespaces:
                text = textlib.replaceExcept(
                    text,
                    r"\[\[\s*(%s) *:(?P<nameAndLabel>.*?)\]\]" % "|".join(namespaces),
                    r"[[%s:\g<nameAndLabel>]]" % thisNs,
                    exceptions,
                )
        return text
 def fix_languages(self, data):
     ret = False
     for lang, norm in self.lang_map.items():
         label = data['labels'].get(lang)
         if not label:
             continue
         if norm:
             if norm in data['labels']:
                 aliases = data['aliases'].get(norm, [])
                 if label not in map(first_lower, aliases):
                     aliases.append(label)
                     data['aliases'][norm] = aliases
             else:
                 data['labels'][norm] = label
         data['labels'][lang] = ''
         ret = True
     for lang, norm in self.lang_map.items():
         description = data['descriptions'].get(lang)
         if description:
             if norm and norm not in data['descriptions']:
                 data['descriptions'][norm] = description
             data['descriptions'][lang] = ''
             ret = True
     for lang, norm in self.lang_map.items():
         old_aliases = data['aliases'].get(lang)
         if old_aliases:
             if norm:
                 new_aliases = data['aliases'].get(norm, [])
                 already = set(map(first_lower, new_aliases))
                 if norm in data['labels']:
                     already.add(first_lower(data['labels'][norm]))
                 for alias in old_aliases:
                     if alias not in already:
                         new_aliases.append(alias)
                         already.add(alias)
                 data['aliases'][norm] = new_aliases
             data['aliases'][lang] = []
             ret = True
     return ret
 def fix_languages(self, data):
     ret = False
     for lang, norm in self.lang_map.items():
         label = data['labels'].get(lang)
         if not label:
             continue
         if norm:
             if norm in data['labels']:
                 aliases = data['aliases'].get(norm, [])
                 if label not in map(first_lower, aliases):
                     aliases.append(label)
                     data['aliases'][norm] = aliases
             else:
                 data['labels'][norm] = label
         data['labels'][lang] = ''
         ret = True
     for lang, norm in self.lang_map.items():
         description = data['descriptions'].get(lang)
         if description:
             if norm and norm not in data['descriptions']:
                 data['descriptions'][norm] = description
             data['descriptions'][lang] = ''
             ret = True
     for lang, norm in self.lang_map.items():
         old_aliases = data['aliases'].get(lang)
         if old_aliases:
             if norm:
                 new_aliases = data['aliases'].get(norm, [])
                 already = set(map(first_lower, new_aliases))
                 if norm in data['labels']:
                     already.add(first_lower(data['labels'][norm]))
                 for alias in old_aliases:
                     if alias not in already:
                         new_aliases.append(alias)
                         already.add(alias)
                 data['aliases'][norm] = new_aliases
             data['aliases'][lang] = []
             ret = True
     return ret
Ejemplo n.º 21
0
 def fix_languages(self, data):
     ret = False
     for lang, norm in self.lang_map.items():
         label = data['labels'].get(lang)
         if not label:
             continue
         if norm:
             norm_label = data['labels'].get(norm)
             if norm_label:
                 if first_lower(norm_label) != first_lower(label):
                     aliases = data['aliases'].get(norm, [])
                     if first_lower(label) not in map(first_lower, aliases):
                         aliases.append(label)
                         data['aliases'][norm] = aliases
             else:
                 data['labels'][norm] = label
         data['labels'][lang] = ''
         ret = True
     for lang, norm in self.lang_map.items():
         description = data['descriptions'].get(lang)
         if description:
             if norm and norm not in data['descriptions']:
                 data['descriptions'][norm] = description
             data['descriptions'][lang] = ''
             ret = True
     for lang, norm in self.lang_map.items():
         old_aliases = data['aliases'].get(lang)
         if not old_aliases:
             continue
         if norm:
             new_aliases = data['aliases'].get(norm, [])
             already = set(map(first_lower, new_aliases))
             norm_label = data['labels'].get(norm)
             if norm_label:
                 already.add(first_lower(norm_label))
             for alias in old_aliases:
                 if first_lower(alias) not in already:
                     new_aliases.append(alias)
                     already.add(first_lower(alias))
             # fixme: buggy, raises not-recognized-array
             data['aliases'][norm] = new_aliases
         data['aliases'][lang] = []
         ret = True
     return ret
    def treat(self, refPage, disambPage):
        """Treat a page.

        @param disambPage: the disambiguation page or redirect we don't want
            anything to link to
        @type disambPage: pywikibot.Page
        @param refPage: a page linking to disambPage
        @type refPage: pywikibot.Page
        @return: False if the user pressed q to completely quit the program,
            True otherwise
        @rtype: bool

        """
        # TODO: break this function up into subroutines!

        self.current_page = refPage
        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
                                 % (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s'
                             % (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False, automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?'
                    % refPage.title(),
                    [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary, main_only=self.main_only
                    )
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.'
                % refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s"
                                         % refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]"
                                  % (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str and
                        self.dn_template_str[:-2] in text[m.end():m.end() +
                                                          len(self.dn_template_str) + 8]):
                    continue

                edit = EditOption('edit page', 'e', text, m.start(), disambPage.title())
                context_option = HighlightContextOption(
                    'more context', 'm', text, 60, start=m.start(), end=m.end())
                context_option.before_question = True

                options = [ListOption(self.alternatives, ''),
                           ListOption(self.alternatives, 'r'),
                           StandardOption('skip link', 's'),
                           edit,
                           StandardOption('next page', 'n'),
                           StandardOption('unlink', 'u')]
                if self.dn_template_str:
                    # '?', '/' for old choice
                    options += [AliasOption('tag template %s' % self.dn_template_str,
                                            ['t', '?', '/'])]
                options += [context_option]
                if not edited:
                    options += [ShowPageOption('show disambiguation page', 'd',
                                               m.start(), disambPage)]
                options += [
                    OutputProxyOption('list', 'l',
                                      SequenceOutputter(self.alternatives)),
                    AddAlternativeOption('add new', 'a',
                                         SequenceOutputter(self.alternatives))]
                if edited:
                    options += [StandardOption('save in this form', 'x')]

                # TODO: Output context on each question
                answer = pywikibot.input_choice('Option', options,
                                                default=self.always,
                                                force=bool(self.always))
                if answer == 'x':
                    assert edited, 'invalid option before editing'
                    break
                elif answer == 's':
                    n -= 1  # TODO what's this for?
                    continue
                elif answer == 'e':
                    text = edit.new_text
                    edited = True
                    curpos = 0
                    continue
                elif answer == 'n':
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                if answer == 't':
                    assert self.dn_template_str
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif answer == 'u':
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    # Check that no option from above was missed
                    assert isinstance(answer, tuple), 'only tuple answer left.'
                    assert answer[0] in ['r', ''], 'only valid tuple answers.'
                    if answer[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    new_page_title = answer[1]
                    repPl = pywikibot.Page(pywikibot.Link(new_page_title,
                                                          disambPage.site))
                    if (new_page_title[0].isupper() or
                            link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title,
                                                  section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text and
                                       not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (
                        (len(new_page_title) <= len(link_text)) and
                        (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and
                        (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and
                        (not section)
                    ):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Ejemplo n.º 23
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()

    # make a backup of the original text so we can show the changes later
    linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                       r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' +
                       linktrail + ')')
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title').strip() == '' or \
           mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(targetPage.site, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        choice = 'y'

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')

        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice in "uU":
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue
        replaceit = choice in "rR"

        # remove preleading ":"
        if link_text[0] == ':':
            link_text = link_text[1:]
        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())

        # remove preleading ":"
        if new_page_title[0] == ':':
            new_page_title = new_page_title[1:]

        if replaceit and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif replaceit or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text
Ejemplo n.º 24
0
    genFactory = pagegenerators.GeneratorFactory(site=site)
    for ns in (0, 14, 100):
        if family != 'wikisource' and ns == 100: # fixme: cswikiquote
            continue
        if family == 'wikisource' and ns == 0:
            continue
        genFactory.handleArg('-ns:%i' % ns)
    genFactory.handleArg('-unconnectedpages')
    generator = genFactory.getCombinedGenerator(preload=True)

    for page in generator:
        if page.namespace() != 14 and page.isDisambig():
            continue

        for template, fields in textlib.extract_templates_and_params(page.text):
            if first_lower(template) not in tp_map[project]:
                continue

            params = tp_map[project][first_lower(template)]
            for key in fields:
                if key not in params:
                    continue

                title = fields[key].strip()
                if not title:
                    continue

                target_lang = lang
                target_family = family
                if isinstance(params[key], dict):
                    if params[key].get('namespaces', []) and page.namespace() not in params[key]['namespaces']:
Ejemplo n.º 25
0
    def treat(self, refPage, disambPage):
        """
        Treat a page.

        Parameters:
            disambPage - The disambiguation page or redirect we don't want
                anything to link to
            refPage - A page linking to disambPage
        Returns False if the user pressed q to completely quit the program.
        Otherwise, returns True.

        """
        # TODO: break this function up into subroutines!

        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output('\n\nSkipping %s because it contains %s.\n\n'
                                 % (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s'
                             % (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False, automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?'
                    % refPage.title(),
                    [('yes', 'y'), ('no', 'n'), ('change redirect', 'c')], 'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(refPage,
                                                           self.primary)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.'
                % refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s"
                                         % refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]"
                                  % (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str and
                        self.dn_template_str[:-2] in text[m.end():m.end() +
                                                          len(self.dn_template_str) + 8]):
                    continue

                # This loop will run while the user doesn't choose an option
                # that will actually change the page
                while True:
                    self.current_page = refPage

                    if not self.always:
                        # at the beginning of the link, start red color.
                        # at the end of the link, reset the color to default
                        pywikibot.output(
                            text[max(0, m.start() - context):m.start()] +
                            '\03{lightred}' + text[m.start():m.end()] +
                            '\03{default}' + text[m.end():m.end() + context])
                        options = ['#', 'r#', '[s]kip link', '[e]dit page',
                                   '[n]ext page', '[u]nlink', '[q]uit']
                        if self.dn_template_str:
                            options.append(u'[t]ag template %s' % self.dn_template_str)
                        options.append('[m]ore context')
                        if not edited:
                            options.append('show [d]isambiguation page')
                        options += ['[l]ist', '[a]dd new']
                        if edited:
                            options += ['save in this form [x]']
                        options = concat_options('Option', 72, options)
                        choice = pywikibot.input(options)
                    else:
                        choice = self.always
                    if choice in ['a', 'A']:
                        newAlternative = pywikibot.input(u'New alternative:')
                        self.alternatives.append(newAlternative)
                        self.listAlternatives()
                    elif choice in ['e', 'E']:
                        editor = editarticle.TextEditor()
                        newText = editor.edit(text, jumpIndex=m.start(),
                                              highlight=disambPage.title())
                        # if user didn't press Cancel
                        if newText and newText != text:
                            text = newText
                            break
                    elif choice in ['d', 'D']:
                        editor = editarticle.TextEditor()
                        if disambPage.isRedirectPage():
                            disambredir = disambPage.getRedirectTarget()
                            editor.edit(
                                disambredir.get(),
                                jumpIndex=m.start(),
                                highlight=disambredir.title())
                        else:
                            editor.edit(
                                disambPage.get(),
                                jumpIndex=m.start(),
                                highlight=disambPage.title())
                    elif choice in ['l', 'L']:
                        self.listAlternatives()
                    elif choice in ['m', 'M']:
                        # show more text around the link we're working on
                        context *= 2
                    else:
                        break

                if choice in ['e', 'E']:
                    # user has edited the page and then pressed 'OK'
                    edited = True
                    curpos = 0
                    continue
                elif choice in ['n', 'N']:
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True
                elif choice in ['q', 'Q']:
                    # quit the program
                    self.quit()
                elif choice in ['s', 'S']:
                    # Next link on this page
                    n -= 1
                    continue
                elif choice in ['x', 'X'] and edited:
                    # Save the page as is
                    break

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                # '?', '/' for old choice
                if choice in ['t', 'T', '?', '/'] and self.dn_template_str:
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif choice in ['u', 'U']:
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    if len(choice) > 0 and choice[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                        choice = choice[1:]
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    try:
                        choice = int(choice)
                    except ValueError:
                        pywikibot.output(u"Unknown option")
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    if choice >= len(self.alternatives) or choice < 0:
                        pywikibot.output(
                            u"Choice out of range. Please select a number "
                            u"between 0 and %i." % (len(self.alternatives) - 1))
                        # show list of possible choices
                        self.listAlternatives()
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    new_page_title = self.alternatives[choice]
                    repPl = pywikibot.Page(pywikibot.Link(new_page_title,
                                                          disambPage.site))
                    if (new_page_title[0].isupper() or
                            link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title,
                                                  section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text and
                                       not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (
                        (len(new_page_title) <= len(link_text)) and
                        (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and
                        (re.sub(self.trailR, '', link_text[len(new_page_title):]) == '') and
                        (not section)
                    ):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Ejemplo n.º 26
0
    def treat(self, refPage, disambPage):
        """
        Treat a page.

        Parameters:
            disambPage - The disambiguation page or redirect we don't want
                anything to link to
            refPage - A page linking to disambPage
        Returns False if the user pressed q to completely quit the program.
        Otherwise, returns True.

        """
        # TODO: break this function up into subroutines!

        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False,
                                      automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put_async(redir_text, summary=self.comment)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), [('yes', 'y'), ('no', 'n'),
                                      ('change redirect', 'c')],
                    'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        if not self.treat(refPage2, refPage):
                            break
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # make a backup of the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        pywikibot.output(u"No changes necessary in %s" %
                                         refPage.title())
                        return True
                    else:
                        # stop loop and save page
                        break
                # Make sure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str
                        and self.dn_template_str[:-2]
                        in text[m.end():m.end() + len(self.dn_template_str) +
                                8]):
                    continue

                # This loop will run while the user doesn't choose an option
                # that will actually change the page
                while True:
                    self.current_page = refPage

                    if not self.always:
                        # at the beginning of the link, start red color.
                        # at the end of the link, reset the color to default
                        pywikibot.output(text[max(0,
                                                  m.start() -
                                                  context):m.start()] +
                                         '\03{lightred}' +
                                         text[m.start():m.end()] +
                                         '\03{default}' +
                                         text[m.end():m.end() + context])
                        options = [
                            '#', 'r#', '[s]kip link', '[e]dit page',
                            '[n]ext page', '[u]nlink', '[q]uit'
                        ]
                        if self.dn_template_str:
                            options.append(u'[t]ag template %s' %
                                           self.dn_template_str)
                        options.append('[m]ore context')
                        if not edited:
                            options.append('show [d]isambiguation page')
                        options += ['[l]ist', '[a]dd new']
                        if edited:
                            options += ['save in this form [x]']
                        options = concat_options('Option', 72, options)
                        choice = pywikibot.input(options)
                    else:
                        choice = self.always
                    if choice in ['a', 'A']:
                        newAlternative = pywikibot.input(u'New alternative:')
                        self.alternatives.append(newAlternative)
                        self.listAlternatives()
                    elif choice in ['e', 'E']:
                        editor = editarticle.TextEditor()
                        newText = editor.edit(text,
                                              jumpIndex=m.start(),
                                              highlight=disambPage.title())
                        # if user didn't press Cancel
                        if newText and newText != text:
                            text = newText
                            break
                    elif choice in ['d', 'D']:
                        editor = editarticle.TextEditor()
                        if disambPage.isRedirectPage():
                            disambredir = disambPage.getRedirectTarget()
                            editor.edit(disambredir.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambredir.title())
                        else:
                            editor.edit(disambPage.get(),
                                        jumpIndex=m.start(),
                                        highlight=disambPage.title())
                    elif choice in ['l', 'L']:
                        self.listAlternatives()
                    elif choice in ['m', 'M']:
                        # show more text around the link we're working on
                        context *= 2
                    else:
                        break

                if choice in ['e', 'E']:
                    # user has edited the page and then pressed 'OK'
                    edited = True
                    curpos = 0
                    continue
                elif choice in ['n', 'N']:
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return True
                elif choice in ['q', 'Q']:
                    # quit the program
                    self.quit()
                elif choice in ['s', 'S']:
                    # Next link on this page
                    n -= 1
                    continue
                elif choice in ['x', 'X'] and edited:
                    # Save the page as is
                    break

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                # '?', '/' for old choice
                if choice in ['t', 'T', '?', '/'] and self.dn_template_str:
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif choice in ['u', 'U']:
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    if len(choice) > 0 and choice[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                        choice = choice[1:]
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    try:
                        choice = int(choice)
                    except ValueError:
                        pywikibot.output(u"Unknown option")
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    if choice >= len(self.alternatives) or choice < 0:
                        pywikibot.output(
                            u"Choice out of range. Please select a number "
                            u"between 0 and %i." %
                            (len(self.alternatives) - 1))
                        # show list of possible choices
                        self.listAlternatives()
                        # step back to ask the user again what to do with the
                        # current link
                        curpos -= 1
                        continue
                    new_page_title = self.alternatives[choice]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue

                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put_async(text, summary=self.comment)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return True
Ejemplo n.º 27
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    # make a backup of the original text so we can show the changes later
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()
    linkR = re.compile(
        r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
        % linktrail)
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(mysite, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        # how many bytes should be displayed around the current link
        context = 30
        # at the beginning of the link, start red color.
        # at the end of the link, reset the color to default
        pywikibot.output(text[max(0,
                                  m.start() - context):m.start()] +
                         '\03{lightred}' + text[m.start():m.end()] +
                         '\03{default}' + text[m.end():m.end() + context])
        choice = pywikibot.input_choice(
            'What should be done with the link?',
            (('Do not change', 'n'),
             ('Change link to \03{lightpurple}%s\03{default}' %
              targetPage.title(), 'y'), ('Change and replace text', 'r'),
             ('Unlink', 'u')),
            default='n',
            automatic_quit=False)

        if choice == 'n':
            continue

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')
        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice == 'u':
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue

        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())
        if choice == 'r' and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif choice == 'r' or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text
Ejemplo n.º 28
0
    def replace_links(self, text, linkedPage, targetPage):
        """Replace all source links by target."""
        mysite = pywikibot.Site()
        linktrail = mysite.linktrail()

        # make a backup of the original text so we can show the changes later
        linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                           r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
        curpos = 0
        # This loop will run until we have finished the current page
        while True:
            m = linkR.search(text, pos=curpos)
            if not m:
                break
            # Make sure that next time around we will not find this same hit.
            curpos = m.start() + 1
            # ignore interwiki links and links to sections of the same page
            if m.group('title').strip() == '' or \
               mysite.isInterwikiLink(m.group('title')):
                continue
            else:
                actualLinkPage = pywikibot.Page(targetPage.site, m.group('title'))
                # Check whether the link found is to page.
                if actualLinkPage != linkedPage:
                    continue

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            page_title = m.group('title')
            link_text = m.group('label')

            if not link_text:
                # or like this: [[page_title]]trailing_chars
                link_text = page_title
            if m.group('section') is None:
                section = ''
            else:
                section = m.group('section')
            if section and targetPage.section():
                pywikibot.warning(
                    'Source section {0} and target section {1} found. '
                    'Skipping.'.format(section, targetPage))
                continue
            trailing_chars = m.group('linktrail')
            if trailing_chars:
                link_text += trailing_chars

            # remove preleading ":"
            if link_text[0] == ':':
                link_text = link_text[1:]
            if link_text[0].isupper() or link_text[0].isdigit():
                new_page_title = targetPage.title()
            else:
                new_page_title = first_lower(targetPage.title())

            # remove preleading ":"
            if new_page_title[0] == ':':
                new_page_title = new_page_title[1:]

            if (new_page_title == link_text and not section):
                newlink = "[[%s]]" % new_page_title
            # check if we can create a link with trailing characters instead of a
            # pipelink
            elif (len(new_page_title) <= len(link_text) and
                  firstcap(link_text[:len(new_page_title)]) ==
                  firstcap(new_page_title) and
                  re.sub(re.compile(linktrail), '',
                         link_text[len(new_page_title):]) == '' and
                  not section):
                newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                        link_text[len(new_page_title):])
            else:
                newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
            text = text[:m.start()] + newlink + text[m.end():]
            continue
        return text
Ejemplo n.º 29
0
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            try:
                is_interwiki = self.site.isInterwikiLink(titleWithSection)
            except ValueError:  # T111513
                is_interwiki = True

            if not is_interwiki:
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                page = pywikibot.Page(
                    pywikibot.Link(titleWithSection, self.site))
                try:
                    namespace = page.namespace()
                except pywikibot.InvalidTitle:
                    return match.group()
                if namespace == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    from pywikibot.page import url2unicode
                    titleWithSection = url2unicode(titleWithSection,
                                                   encodings=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes.
                    # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if self.site.siteinfo['case'] == 'first-letter':
                        firstcase_title = first_lower(titleWithSection)
                        firstcase_label = first_lower(label)
                    else:
                        firstcase_title = titleWithSection
                        firstcase_label = label

                    if firstcase_label == firstcase_title:
                        newLink = '[[%s]]' % label
                    # Check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (firstcase_label.startswith(firstcase_title) and
                          trailR.sub('', label[len(titleWithSection):]) == ''):
                        newLink = '[[%s]]%s' % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])

                    else:
                        # Try to capitalize the first letter of the title.
                        # Not useful for languages that don't capitalize nouns.
                        # TODO: Add a configuration variable for each site,
                        # which determines if the link target is written in
                        # uppercase
                        if self.site.sitename == 'wikipedia:de':
                            titleWithSection = first_upper(titleWithSection)
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()
        def handleOneLink(match):
            titleWithSection = match.group('titleWithSection')
            label = match.group('label')
            trailingChars = match.group('linktrail')
            newline = match.group('newline')

            if not self.site.isInterwikiLink(titleWithSection):
                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                # We only work on namespace 0 because pipes and linktrails work
                # differently for images and categories.
                page = pywikibot.Page(pywikibot.Link(titleWithSection,
                                                     self.site))
                try:
                    namespace = page.namespace()
                except pywikibot.InvalidTitle:
                    return match.group()
                if namespace == 0:
                    # Replace underlines by spaces, also multiple underlines
                    titleWithSection = re.sub('_+', ' ', titleWithSection)
                    # Remove double spaces
                    titleWithSection = re.sub('  +', ' ', titleWithSection)
                    # Remove unnecessary leading spaces from title,
                    # but remember if we did this because we eventually want
                    # to re-add it outside of the link later.
                    titleLength = len(titleWithSection)
                    titleWithSection = titleWithSection.lstrip()
                    hadLeadingSpaces = (len(titleWithSection) != titleLength)
                    hadTrailingSpaces = False
                    # Remove unnecessary trailing spaces from title,
                    # but remember if we did this because it may affect
                    # the linktrail and because we eventually want to
                    # re-add it outside of the link later.
                    if not trailingChars:
                        titleLength = len(titleWithSection)
                        titleWithSection = titleWithSection.rstrip()
                        hadTrailingSpaces = (len(titleWithSection) !=
                                             titleLength)

                    # Convert URL-encoded characters to unicode
                    from pywikibot.page import url2unicode
                    titleWithSection = url2unicode(titleWithSection,
                                                   encodings=self.site)

                    if titleWithSection == '':
                        # just skip empty links.
                        return match.group()

                    # Remove unnecessary initial and final spaces from label.
                    # Please note that some editors prefer spaces around pipes.
                    # (See [[en:Wikipedia:Semi-bots]]). We remove them anyway.
                    if label is not None:
                        # Remove unnecessary leading spaces from label,
                        # but remember if we did this because we want
                        # to re-add it outside of the link later.
                        labelLength = len(label)
                        label = label.lstrip()
                        hadLeadingSpaces = (len(label) != labelLength)
                        # Remove unnecessary trailing spaces from label,
                        # but remember if we did this because it affects
                        # the linktrail.
                        if not trailingChars:
                            labelLength = len(label)
                            label = label.rstrip()
                            hadTrailingSpaces = (len(label) != labelLength)
                    else:
                        label = titleWithSection
                    if trailingChars:
                        label += trailingChars

                    if titleWithSection == label or \
                       first_lower(titleWithSection) == label:
                        newLink = "[[%s]]" % label
                    # Check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif (len(titleWithSection) <= len(label) and
                          label[:len(titleWithSection)] == titleWithSection and
                          re.sub(trailR, '',
                                 label[len(titleWithSection):]) == ''):
                        newLink = "[[%s]]%s" % (label[:len(titleWithSection)],
                                                label[len(titleWithSection):])
                    else:
                        # Try to capitalize the first letter of the title.
                        # Not useful for languages that don't capitalize nouns.
                        # TODO: Add a configuration variable for each site,
                        # which determines if the link target is written in
                        # uppercase
                        if self.site.sitename == 'wikipedia:de':
                            titleWithSection = first_upper(titleWithSection)
                        newLink = "[[%s|%s]]" % (titleWithSection, label)
                    # re-add spaces that were pulled out of the link.
                    # Examples:
                    #   text[[ title ]]text        -> text [[title]] text
                    #   text[[ title | name ]]text -> text [[title|name]] text
                    #   text[[ title |name]]text   -> text[[title|name]]text
                    #   text[[title| name]]text    -> text [[title|name]]text
                    if hadLeadingSpaces and not newline:
                        newLink = ' ' + newLink
                    if hadTrailingSpaces:
                        newLink = newLink + ' '
                    if newline:
                        newLink = newline + newLink
                    return newLink
            # don't change anything
            return match.group()
Ejemplo n.º 31
0
    genFactory = pagegenerators.GeneratorFactory(site=site)
    for ns in (0, 14, 100):
        if family != 'wikisource' and ns == 100: # fixme: cswikiquote
            continue
        if family == 'wikisource' and ns == 0:
            continue
        genFactory.handleArg('-ns:%i' % ns)
    genFactory.handleArg('-unconnectedpages')
    generator = genFactory.getCombinedGenerator(preload=True)

    for page in generator:
        if page.namespace() != 14 and page.isDisambig():
            continue

        for template, fields in textlib.extract_templates_and_params(page.text):
            if first_lower(template) not in tp_map[project].keys():
                continue

            params = tp_map[project][first_lower(template)]
            for key in fields.keys():
                if key not in params.keys():
                    continue

                title = fields[key].strip()
                if not title:
                    continue

                target_lang = lang
                target_family = family
                if isinstance(params[key], dict):
                    if params[key].get('namespaces', []) and page.namespace() not in params[key]['namespaces']:
Ejemplo n.º 32
0
    def treat_disamb_only(self, refPage, disambPage):
        """Resolve the links to disambPage but don't look for its redirects.

        @param disambPage: the disambiguation page or redirect we don't want
            anything to link to
        @type disambPage: pywikibot.Page
        @param refPage: a page linking to disambPage
        @type refPage: pywikibot.Page
        @return: "nextpage" if the user enters "n" to skip this page,
            "nochange" if the page needs no change, and
            "done" if the page is processed successfully
        @rtype: str

        """
        # TODO: break this function up into subroutines!

        self.current_page = refPage
        include = False
        unlink_counter = 0
        new_targets = []
        try:
            text = refPage.get()
            ignoreReason = self.checkContents(text)
            if ignoreReason:
                pywikibot.output(
                    '\n\nSkipping %s because it contains %s.\n\n' %
                    (refPage.title(), ignoreReason))
            else:
                include = True
        except pywikibot.IsRedirectPage:
            pywikibot.output(u'%s is a redirect to %s' %
                             (refPage.title(), disambPage.title()))
            if disambPage.isRedirectPage():
                target = self.alternatives[0]
                if pywikibot.input_yn(u'Do you want to make redirect %s point '
                                      'to %s?' % (refPage.title(), target),
                                      default=False,
                                      automatic_quit=False):
                    redir_text = '#%s [[%s]]' \
                                 % (self.mysite.redirect(), target)
                    try:
                        refPage.put(redir_text,
                                    summary=self.comment,
                                    asynchronous=True)
                    except pywikibot.PageNotSaved as error:
                        pywikibot.output(u'Page not saved: %s' % error.args)
            else:
                choice = pywikibot.input_choice(
                    u'Do you want to work on pages linking to %s?' %
                    refPage.title(), [('yes', 'y'), ('no', 'n'),
                                      ('change redirect', 'c')],
                    'n',
                    automatic_quit=False)
                if choice == 'y':
                    gen = ReferringPageGeneratorWithIgnore(
                        refPage, self.primary, main_only=self.main_only)
                    preloadingGen = pagegenerators.PreloadingGenerator(gen)
                    for refPage2 in preloadingGen:
                        # run until the user selected 'quit'
                        self.treat(refPage2, refPage)
                elif choice == 'c':
                    text = refPage.get(get_redirect=True)
                    include = "redirect"
        except pywikibot.NoPage:
            pywikibot.output(
                u'Page [[%s]] does not seem to exist?! Skipping.' %
                refPage.title())
            include = False
        if include in (True, "redirect"):
            # save the original text so we can show the changes later
            original_text = text
            n = 0
            curpos = 0
            dn = False
            edited = False
            # This loop will run until we have finished the current page
            while True:
                m = self.linkR.search(text, pos=curpos)
                if not m:
                    if n == 0:
                        # No changes necessary for this disambiguation title.
                        return 'nochange'
                    else:
                        # stop loop and save page
                        break
                # Ensure that next time around we will not find this same hit.
                curpos = m.start() + 1
                try:
                    foundlink = pywikibot.Link(m.group('title'),
                                               disambPage.site)
                    foundlink.parse()
                except pywikibot.Error:
                    continue
                # ignore interwiki links
                if foundlink.site != disambPage.site:
                    continue
                # Check whether the link found is to disambPage.
                try:
                    if foundlink.canonical_title() != disambPage.title():
                        continue
                except pywikibot.Error:
                    # must be a broken link
                    pywikibot.log(u"Invalid link [[%s]] in page [[%s]]" %
                                  (m.group('title'), refPage.title()))
                    continue
                n += 1
                # how many bytes should be displayed around the current link
                context = 60
                # check if there's a dn-template here already
                if (self.dnSkip and self.dn_template_str
                        and self.dn_template_str[:-2]
                        in text[m.end():m.end() + len(self.dn_template_str) +
                                8]):
                    continue

                edit = EditOption('edit page', 'e', text, m.start(),
                                  disambPage.title())
                context_option = HighlightContextOption('more context',
                                                        'm',
                                                        text,
                                                        60,
                                                        start=m.start(),
                                                        end=m.end())
                context_option.before_question = True

                options = [
                    ListOption(self.alternatives, ''),
                    ListOption(self.alternatives, 'r'),
                    StandardOption('skip link', 's'), edit,
                    StandardOption('next page', 'n'),
                    StandardOption('unlink', 'u')
                ]
                if self.dn_template_str:
                    # '?', '/' for old choice
                    options += [
                        AliasOption('tag template %s' % self.dn_template_str,
                                    ['t', '?', '/'])
                    ]
                options += [context_option]
                if not edited:
                    options += [
                        ShowPageOption('show disambiguation page', 'd',
                                       m.start(), disambPage)
                    ]
                options += [
                    OutputProxyOption('list', 'l',
                                      SequenceOutputter(self.alternatives)),
                    AddAlternativeOption('add new', 'a',
                                         SequenceOutputter(self.alternatives))
                ]
                if edited:
                    options += [StandardOption('save in this form', 'x')]

                # TODO: Output context on each question
                answer = pywikibot.input_choice('Option',
                                                options,
                                                default=self.always,
                                                force=bool(self.always))
                if answer == 'x':
                    assert edited, 'invalid option before editing'
                    break
                elif answer == 's':
                    n -= 1  # TODO what's this for?
                    continue
                elif answer == 'e':
                    text = edit.new_text
                    edited = True
                    curpos = 0
                    continue
                elif answer == 'n':
                    # skip this page
                    if self.primary:
                        # If run with the -primary argument, skip this
                        # occurrence next time.
                        self.primaryIgnoreManager.ignore(refPage)
                    return 'nextpage'

                # The link looks like this:
                # [[page_title|link_text]]trailing_chars
                page_title = m.group('title')
                link_text = m.group('label')

                if not link_text:
                    # or like this: [[page_title]]trailing_chars
                    link_text = page_title
                if m.group('section') is None:
                    section = ''
                else:
                    section = m.group('section')
                trailing_chars = m.group('linktrail')
                if trailing_chars:
                    link_text += trailing_chars
                if answer == 't':
                    assert self.dn_template_str
                    # small chunk of text to search
                    search_text = text[m.end():m.end() + context]
                    # figure out where the link (and sentance) ends, put note
                    # there
                    end_of_word_match = re.search(r'\s', search_text)
                    if end_of_word_match:
                        position_split = end_of_word_match.start(0)
                    else:
                        position_split = 0
                    # insert dab needed template
                    text = (text[:m.end() + position_split] +
                            self.dn_template_str +
                            text[m.end() + position_split:])
                    dn = True
                    continue
                elif answer == 'u':
                    # unlink - we remove the section if there's any
                    text = text[:m.start()] + link_text + text[m.end():]
                    unlink_counter += 1
                    continue
                else:
                    # Check that no option from above was missed
                    assert isinstance(answer, tuple), 'only tuple answer left.'
                    assert answer[0] in ['r', ''], 'only valid tuple answers.'
                    if answer[0] == 'r':
                        # we want to throw away the original link text
                        replaceit = link_text == page_title
                    elif include == "redirect":
                        replaceit = True
                    else:
                        replaceit = False

                    new_page_title = answer[1]
                    repPl = pywikibot.Page(
                        pywikibot.Link(new_page_title, disambPage.site))
                    if (new_page_title[0].isupper() or link_text[0].isupper()):
                        new_page_title = repPl.title()
                    else:
                        new_page_title = repPl.title()
                        new_page_title = first_lower(new_page_title)
                    if new_page_title not in new_targets:
                        new_targets.append(new_page_title)
                    if replaceit and trailing_chars:
                        newlink = "[[%s%s]]%s" % (new_page_title, section,
                                                  trailing_chars)
                    elif replaceit or (new_page_title == link_text
                                       and not section):
                        newlink = "[[%s]]" % new_page_title
                    # check if we can create a link with trailing characters
                    # instead of a pipelink
                    elif ((len(new_page_title) <= len(link_text))
                          and (firstcap(link_text[:len(new_page_title)])
                               == firstcap(new_page_title))
                          and (re.sub(self.trailR, '',
                                      link_text[len(new_page_title):]) == '')
                          and (not section)):
                        newlink = "[[%s]]%s" \
                                  % (link_text[:len(new_page_title)],
                                     link_text[len(new_page_title):])
                    else:
                        newlink = "[[%s%s|%s]]" \
                                  % (new_page_title, section, link_text)
                    text = text[:m.start()] + newlink + text[m.end():]
                    continue
                # Todo: This line is unreachable (T155337)
                pywikibot.output(text[max(0, m.start() - 30):m.end() + 30])
            if text == original_text:
                pywikibot.output(u'\nNo changes have been made:\n')
            else:
                pywikibot.output(u'\nThe following changes have been made:\n')
                pywikibot.showDiff(original_text, text)
                pywikibot.output(u'')
                # save the page
                self.setSummaryMessage(disambPage, new_targets, unlink_counter,
                                       dn)
                try:
                    refPage.put(text, summary=self.comment, asynchronous=True)
                except pywikibot.LockedPage:
                    pywikibot.output(u'Page not saved: page is locked')
                except pywikibot.PageNotSaved as error:
                    pywikibot.output(u'Page not saved: %s' % error.args)
        return 'done'
Ejemplo n.º 33
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()

    # make a backup of the original text so we can show the changes later
    linkR = re.compile(
        r"\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?" r"(\|(?P<label>[^\]]*))?\]\](?P<linktrail>" + linktrail + ")"
    )
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group("title").strip() == "" or mysite.isInterwikiLink(m.group("title")):
            continue
        else:
            actualLinkPage = pywikibot.Page(targetPage.site, m.group("title"))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        choice = "y"

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group("title")
        link_text = m.group("label")

        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group("section") is None:
            section = ""
        else:
            section = m.group("section")
        trailing_chars = m.group("linktrail")
        if trailing_chars:
            link_text += trailing_chars

        if choice in "uU":
            # unlink - we remove the section if there's any
            text = text[: m.start()] + link_text + text[m.end() :]
            continue
        replaceit = choice in "rR"

        # remove preleading ":"
        if link_text[0] == ":":
            link_text = link_text[1:]
        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())

        # remove preleading ":"
        if new_page_title[0] == ":":
            new_page_title = new_page_title[1:]

        if replaceit and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif replaceit or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif (
            len(new_page_title) <= len(link_text)
            and firstcap(link_text[: len(new_page_title)]) == firstcap(new_page_title)
            and re.sub(re.compile(linktrail), "", link_text[len(new_page_title) :]) == ""
            and not section
        ):
            newlink = "[[%s]]%s" % (link_text[: len(new_page_title)], link_text[len(new_page_title) :])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[: m.start()] + newlink + text[m.end() :]
        continue
    return text
Ejemplo n.º 34
0
    def replace_links(self, text, linkedPage, targetPage):
        """Replace all source links by target."""
        mysite = pywikibot.Site()
        linktrail = mysite.linktrail()

        # make a backup of the original text so we can show the changes later
        linkR = re.compile(r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
                           r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' +
                           linktrail + ')')
        curpos = 0
        # This loop will run until we have finished the current page
        while True:
            m = linkR.search(text, pos=curpos)
            if not m:
                break
            # Make sure that next time around we will not find this same hit.
            curpos = m.start() + 1
            # ignore interwiki links and links to sections of the same page
            if m.group('title').strip() == '' or \
               mysite.isInterwikiLink(m.group('title')):
                continue
            else:
                actualLinkPage = pywikibot.Page(targetPage.site,
                                                m.group('title'))
                # Check whether the link found is to page.
                if actualLinkPage != linkedPage:
                    continue

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            page_title = m.group('title')
            link_text = m.group('label')

            if not link_text:
                # or like this: [[page_title]]trailing_chars
                link_text = page_title
            if m.group('section') is None:
                section = ''
            else:
                section = m.group('section')
            if section and targetPage.section():
                pywikibot.warning(
                    'Source section {0} and target section {1} found. '
                    'Skipping.'.format(section, targetPage))
                continue
            trailing_chars = m.group('linktrail')
            if trailing_chars:
                link_text += trailing_chars

            # remove preleading ":"
            if link_text[0] == ':':
                link_text = link_text[1:]
            if link_text[0].isupper() or link_text[0].isdigit():
                new_page_title = targetPage.title()
            else:
                new_page_title = first_lower(targetPage.title())

            # remove preleading ":"
            if new_page_title[0] == ':':
                new_page_title = new_page_title[1:]

            if (new_page_title == link_text and not section):
                newlink = "[[%s]]" % new_page_title
            # check if we can create a link with trailing characters instead of a
            # pipelink
            elif (len(new_page_title) <= len(link_text)
                  and firstcap(link_text[:len(new_page_title)])
                  == firstcap(new_page_title)
                  and re.sub(re.compile(linktrail), '',
                             link_text[len(new_page_title):]) == ''
                  and not section):
                newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                        link_text[len(new_page_title):])
            else:
                newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
            text = text[:m.start()] + newlink + text[m.end():]
            continue
        return text
Ejemplo n.º 35
0
def treat(text, linkedPage, targetPage):
    """Based on the method of the same name in solve_disambiguation.py."""
    # make a backup of the original text so we can show the changes later
    mysite = pywikibot.Site()
    linktrail = mysite.linktrail()
    linkR = re.compile(
        r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?(\|(?P<label>[^\]]*))?\]\](?P<linktrail>%s)'
        % linktrail)
    curpos = 0
    # This loop will run until we have finished the current page
    while True:
        m = linkR.search(text, pos=curpos)
        if not m:
            break
        # Make sure that next time around we will not find this same hit.
        curpos = m.start() + 1
        # ignore interwiki links and links to sections of the same page
        if m.group('title') == '' or mysite.isInterwikiLink(m.group('title')):
            continue
        else:
            actualLinkPage = pywikibot.Page(mysite, m.group('title'))
            # Check whether the link found is to page.
            if actualLinkPage != linkedPage:
                continue

        # how many bytes should be displayed around the current link
        context = 30
        # at the beginning of the link, start red color.
        # at the end of the link, reset the color to default
        pywikibot.output(text[max(0, m.start() - context): m.start()] +
                         '\03{lightred}' + text[m.start(): m.end()] +
                         '\03{default}' + text[m.end(): m.end() + context])
        choice = pywikibot.input_choice(
            'What should be done with the link?',
            (('Do not change', 'n'),
             ('Change link to \03{lightpurple}%s\03{default}'
              % targetPage.title(), 'y'),
             ('Change and replace text', 'r'), ('Unlink', 'u')),
            default='n', automatic_quit=False)

        if choice == 'n':
            continue

        # The link looks like this:
        # [[page_title|link_text]]trailing_chars
        page_title = m.group('title')
        link_text = m.group('label')
        if not link_text:
            # or like this: [[page_title]]trailing_chars
            link_text = page_title
        if m.group('section') is None:
            section = ''
        else:
            section = m.group('section')
        trailing_chars = m.group('linktrail')
        if trailing_chars:
            link_text += trailing_chars

        if choice == 'u':
            # unlink - we remove the section if there's any
            text = text[:m.start()] + link_text + text[m.end():]
            continue

        if link_text[0].isupper():
            new_page_title = targetPage.title()
        else:
            new_page_title = first_lower(targetPage.title())
        if choice == 'r' and trailing_chars:
            newlink = "[[%s%s]]%s" % (new_page_title, section, trailing_chars)
        elif choice == 'r' or (new_page_title == link_text and not section):
            newlink = "[[%s]]" % new_page_title
        # check if we can create a link with trailing characters instead of a
        # pipelink
        elif len(new_page_title) <= len(link_text) and \
             firstcap(link_text[:len(new_page_title)]) == \
             firstcap(new_page_title) and \
             re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section:
            newlink = "[[%s]]%s" % (link_text[:len(new_page_title)],
                                    link_text[len(new_page_title):])
        else:
            newlink = "[[%s%s|%s]]" % (new_page_title, section, link_text)
        text = text[:m.start()] + newlink + text[m.end():]
        continue
    return text