Beispiel #1
0
    def addReferences(self, oldText):
        """
		Add a references tag into an existing section where it fits into.

		If there is no such section, creates a new section containing
		the references tag.
		* Returns : The modified pagetext

		"""
        # Do we have a malformed <reference> tag which could be repaired?

        # Repair two opening tags or a opening and an empty tag
        pattern = re.compile(
            r'< *references *>(.*?)'
            r'< */?\s*references */? *>', re.DOTALL)
        if pattern.search(oldText):
            return re.sub(pattern, '<references>\1</references>', oldText)
        # Repair single unclosed references tag
        pattern = re.compile(r'< *references *>')
        if pattern.search(oldText):
            return re.sub(pattern, '<references />', oldText)

        # Is there an existing section where we can add the references tag?
        for section in i18n.translate(self.site, referencesSections):
            sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        index = match.end()
                    else:
                        newText = (oldText[:match.end()] + u'\n' +
                                   self.referencesText + u'\n' +
                                   oldText[match.end():])
                        return newText
                else:
                    break

        # Create a new section for the references tag
        for section in i18n.translate(self.site, placeBeforeSections):
            # Find out where to place the new section
            sectionR = re.compile(
                r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        index = match.end()
                    else:
                        index = match.start()
                        ident = match.group('ident')
                        return self.createReferenceSection(
                            oldText, index, ident)
                else:
                    break
        # This gets complicated: we want to place the new references
        # section over the interwiki links and categories, but also
        # over all navigation bars, persondata, and other templates
        # that are at the bottom of the page. So we need some advanced
        # regex magic.
        # The strategy is: create a temporary copy of the text. From that,
        # keep removing interwiki links, templates etc. from the bottom.
        # At the end, look at the length of the temp text. That's the position
        # where we'll insert the references section.
        catNamespaces = '|'.join(self.site.category_namespaces())
        categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces
        interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*'
        # won't work with nested templates
        # the negative lookahead assures that we'll match the last template
        # occurrence in the temp text.
        # FIXME:
        # {{commons}} or {{commonscat}} are part of Weblinks section
        # * {{template}} is mostly part of a section
        # so templatePattern must be fixed
        templatePattern = r'\r?\n{{((?!}}).)+?}}\s*'
        commentPattern = r'<!--((?!-->).)*?-->\s*'
        metadataR = re.compile(
            r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern,
                                         templatePattern, commentPattern),
            re.DOTALL)
        tmpText = oldText
        while True:
            match = metadataR.search(tmpText)
            if match:
                tmpText = tmpText[:match.start()]
            else:
                break
        index = len(tmpText)
        return self.createReferenceSection(oldText, index)
    def addReferences(self, oldText):
        """
        Add a references tag into an existing section where it fits into.

        If there is no such section, creates a new section containing
        the references tag.
        * Returns : The modified pagetext

        """
        # Is there an existing section where we can add the references tag?
        for section in i18n.translate(self.site, referencesSections):
            sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing  %s section is commented out, skipping.'
                            % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references tag to existing %s section...\n'
                            % section)
                        newText = (
                            oldText[:match.end()] + u'\n' +
                            self.referencesText + u'\n' +
                            oldText[match.end():]
                        )
                        return newText
                else:
                    break

        # Create a new section for the references tag
        for section in i18n.translate(self.site, placeBeforeSections):
            # Find out where to place the new section
            sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n'
                                  % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing %s section is commented out, won\'t add '
                            'the references in front of it.' % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references section before %s section...\n'
                            % section)
                        index = match.start()
                        ident = match.group('ident')
                        return self.createReferenceSection(oldText, index,
                                                           ident)
                else:
                    break
        # This gets complicated: we want to place the new references
        # section over the interwiki links and categories, but also
        # over all navigation bars, persondata, and other templates
        # that are at the bottom of the page. So we need some advanced
        # regex magic.
        # The strategy is: create a temporary copy of the text. From that,
        # keep removing interwiki links, templates etc. from the bottom.
        # At the end, look at the length of the temp text. That's the position
        # where we'll insert the references section.
        catNamespaces = '|'.join(self.site.category_namespaces())
        categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces
        interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*'
        # won't work with nested templates
        # the negative lookahead assures that we'll match the last template
        # occurence in the temp text.
        # FIXME:
        # {{commons}} or {{commonscat}} are part of Weblinks section
        # * {{template}} is mostly part of a section
        # so templatePattern must be fixed
        templatePattern = r'\r?\n{{((?!}}).)+?}}\s*'
        commentPattern = r'<!--((?!-->).)*?-->\s*'
        metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$'
                               % (categoryPattern, interwikiPattern,
                                  templatePattern, commentPattern), re.DOTALL)
        tmpText = oldText
        while True:
            match = metadataR.search(tmpText)
            if match:
                tmpText = tmpText[:match.start()]
            else:
                break
        pywikibot.output(
            u'Found no section that can be preceeded by a new references '
            u'section.\nPlacing it before interwiki links, categories, and '
            u'bottom templates.')
        index = len(tmpText)
        return self.createReferenceSection(oldText, index)
Beispiel #3
0
    def addReferences(self, oldText):
        """
        Add a references tag into an existing section where it fits into.

        If there is no such section, creates a new section containing
        the references tag.
        * Returns : The modified pagetext

        """
        # Is there an existing section where we can add the references tag?
        for section in i18n.translate(self.site, referencesSections):
            sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing  %s section is commented out, skipping.'
                            % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references tag to existing %s section...\n'
                            % section)
                        newText = (oldText[:match.end()] + u'\n' +
                                   self.referencesText + u'\n' +
                                   oldText[match.end():])
                        return newText
                else:
                    break

        # Create a new section for the references tag
        for section in i18n.translate(self.site, placeBeforeSections):
            # Find out where to place the new section
            sectionR = re.compile(
                r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing %s section is commented out, won\'t add '
                            'the references in front of it.' % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references section before %s section...\n'
                            % section)
                        index = match.start()
                        ident = match.group('ident')
                        return self.createReferenceSection(
                            oldText, index, ident)
                else:
                    break
        # This gets complicated: we want to place the new references
        # section over the interwiki links and categories, but also
        # over all navigation bars, persondata, and other templates
        # that are at the bottom of the page. So we need some advanced
        # regex magic.
        # The strategy is: create a temporary copy of the text. From that,
        # keep removing interwiki links, templates etc. from the bottom.
        # At the end, look at the length of the temp text. That's the position
        # where we'll insert the references section.
        catNamespaces = '|'.join(self.site.category_namespaces())
        categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces
        interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*'
        # won't work with nested templates
        # the negative lookahead assures that we'll match the last template
        # occurrence in the temp text.
        # FIXME:
        # {{commons}} or {{commonscat}} are part of Weblinks section
        # * {{template}} is mostly part of a section
        # so templatePattern must be fixed
        templatePattern = r'\r?\n{{((?!}}).)+?}}\s*'
        commentPattern = r'<!--((?!-->).)*?-->\s*'
        metadataR = re.compile(
            r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern,
                                         templatePattern, commentPattern),
            re.DOTALL)
        tmpText = oldText
        while True:
            match = metadataR.search(tmpText)
            if match:
                tmpText = tmpText[:match.start()]
            else:
                break
        pywikibot.output(
            u'Found no section that can be preceeded by a new references '
            u'section.\nPlacing it before interwiki links, categories, and '
            u'bottom templates.')
        index = len(tmpText)
        return self.createReferenceSection(oldText, index)
    def replace_links(self, text, linkedPage, targetPage):
        """Replace all source links by target."""
        mysite = pywikibot.Site()
        linktrail = mysite.linktrail()

        # make a backup of the original text so we can show the changes later
        linkR = re.compile(
            r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?'
            r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')')
        curpos = 0
        # This loop will run until we have finished the current page
        while True:
            m = linkR.search(text, pos=curpos)
            if not m:
                break
            # Make sure that next time around we will not find this same hit.
            curpos = m.start() + 1
            # ignore interwiki links, links in the disabled area
            # and links to sections of the same page
            if (m.group('title').strip() == ''
                    or mysite.isInterwikiLink(m.group('title'))
                    or isDisabled(text, m.start())):
                continue
            else:
                actualLinkPage = pywikibot.Page(
                    targetPage.site, m.group('title'))
                # Check whether the link found is to page.
                try:
                    actualLinkPage.title()
                except InvalidTitle:
                    pywikibot.exception()
                    continue
                if actualLinkPage != linkedPage:
                    continue

            # The link looks like this:
            # [[page_title|link_text]]trailing_chars
            page_title = m.group('title')
            link_text = m.group('label')

            if not link_text:
                # or like this: [[page_title]]trailing_chars
                link_text = page_title
            if m.group('section') is None:
                section = ''
            else:
                section = m.group('section')
            if section and targetPage.section():
                pywikibot.warning(
                    'Source section {0} and target section {1} found. '
                    'Skipping.'.format(section, targetPage))
                continue
            trailing_chars = m.group('linktrail')
            if trailing_chars:
                link_text += trailing_chars

            # remove preleading ":"
            if link_text[0] == ':':
                link_text = link_text[1:]
            if link_text[0].isupper() or link_text[0].isdigit():
                new_page_title = targetPage.title()
            else:
                new_page_title = first_lower(targetPage.title())

            # remove preleading ":"
            if new_page_title[0] == ':':
                new_page_title = new_page_title[1:]

            if (new_page_title == link_text and not section):
                newlink = '[[{}]]'.format(new_page_title)
            # check if we can create a link with trailing characters instead of
            # a pipelink
            elif (len(new_page_title) <= len(link_text)
                  and (firstcap(link_text[:len(new_page_title)])
                       == firstcap(new_page_title))
                  and re.sub(re.compile(linktrail), '',
                             link_text[len(new_page_title):]) == ''
                  and not section):
                newlink = '[[{}]]{}'.format(link_text[:len(new_page_title)],
                                            link_text[len(new_page_title):])
            else:
                newlink = '[[{}{}|{}]]'.format(new_page_title,
                                               section, link_text)
            text = text[:m.start()] + newlink + text[m.end():]
            continue
        return text
    def addReferences(self, oldText):
        """
        Add a references tag into an existing section where it fits into.

        If there is no such section, creates a new section containing
        the references tag. Also repair malformed references tags.
        Set the edit summary accordingly.

        @param oldText: page text to be modified
        @type oldText: str
        @return: The modified pagetext
        @rtype: str
        """
        # Do we have a malformed <reference> tag which could be repaired?
        # Set the edit summary for this case
        self.comment = i18n.twtranslate(self.site, 'noreferences-fix-tag')

        # Repair two opening tags or a opening and an empty tag
        pattern = re.compile(r'< *references *>(.*?)'
                             r'< */?\s*references */? *>', re.DOTALL)
        if pattern.search(oldText):
            pywikibot.output('Repairing references tag')
            return re.sub(pattern, r'<references>\1</references>', oldText)
        # Repair single unclosed references tag
        pattern = re.compile(r'< *references *>')
        if pattern.search(oldText):
            pywikibot.output('Repairing references tag')
            return re.sub(pattern, '<references />', oldText)

        # Is there an existing section where we can add the references tag?
        # Set the edit summary for this case
        self.comment = i18n.twtranslate(self.site, 'noreferences-add-tag')
        for section in i18n.translate(self.site, referencesSections):
            sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing %s section is commented out, skipping.'
                            % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            'Adding references tag to existing %s section...\n'
                            % section)
                        templates_or_comments = re.compile(
                            r'^((?:\s*(?:\{\{[^\{\}]*?\}\}|<!--.*?-->))*)',
                            flags=re.DOTALL)
                        new_text = (
                            oldText[:match.end() - 1]
                            + templates_or_comments.sub(
                                r'\1\n{0}\n'.format(self.referencesText),
                                oldText[match.end() - 1:]))
                        return new_text
                else:
                    break

        # Create a new section for the references tag
        for section in i18n.translate(self.site, placeBeforeSections):
            # Find out where to place the new section
            sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n'
                                  % section)
            index = 0
            while index < len(oldText):
                match = sectionR.search(oldText, index)
                if match:
                    if textlib.isDisabled(oldText, match.start()):
                        pywikibot.output(
                            'Existing %s section is commented out, won\'t add '
                            'the references in front of it.' % section)
                        index = match.end()
                    else:
                        pywikibot.output(
                            u'Adding references section before %s section...\n'
                            % section)
                        index = match.start()
                        ident = match.group('ident')
                        return self.createReferenceSection(oldText, index,
                                                           ident)
                else:
                    break
        # This gets complicated: we want to place the new references
        # section over the interwiki links and categories, but also
        # over all navigation bars, persondata, and other templates
        # that are at the bottom of the page. So we need some advanced
        # regex magic.
        # The strategy is: create a temporary copy of the text. From that,
        # keep removing interwiki links, templates etc. from the bottom.
        # At the end, look at the length of the temp text. That's the position
        # where we'll insert the references section.
        catNamespaces = '|'.join(self.site.namespaces.CATEGORY)
        categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces
        interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*'
        # won't work with nested templates
        # the negative lookahead assures that we'll match the last template
        # occurrence in the temp text.
        # FIXME:
        # {{commons}} or {{commonscat}} are part of Weblinks section
        # * {{template}} is mostly part of a section
        # so templatePattern must be fixed
        templatePattern = r'\r?\n{{((?!}}).)+?}}\s*'
        commentPattern = r'<!--((?!-->).)*?-->\s*'
        metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$'
                               % (categoryPattern, interwikiPattern,
                                  templatePattern, commentPattern), re.DOTALL)
        tmpText = oldText
        while True:
            match = metadataR.search(tmpText)
            if match:
                tmpText = tmpText[:match.start()]
            else:
                break
        pywikibot.output(
            u'Found no section that can be preceeded by a new references '
            u'section.\nPlacing it before interwiki links, categories, and '
            u'bottom templates.')
        index = len(tmpText)
        return self.createReferenceSection(oldText, index)