def addReferences(self, oldText): """ Add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Do we have a malformed <reference> tag which could be repaired? # Repair two opening tags or a opening and an empty tag pattern = re.compile( r'< *references *>(.*?)' r'< */?\s*references */? *>', re.DOTALL) if pattern.search(oldText): return re.sub(pattern, '<references>\1</references>', oldText) # Repair single unclosed references tag pattern = re.compile(r'< *references *>') if pattern.search(oldText): return re.sub(pattern, '<references />', oldText) # Is there an existing section where we can add the references tag? for section in i18n.translate(self.site, referencesSections): sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): index = match.end() else: newText = (oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():]) return newText else: break # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile( r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): index = match.end() else: index = match.start() ident = match.group('ident') return self.createReferenceSection( oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurrence in the temp text. # FIXME: # {{commons}} or {{commonscat}} are part of Weblinks section # * {{template}} is mostly part of a section # so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile( r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break index = len(tmpText) return self.createReferenceSection(oldText, index)
def addReferences(self, oldText): """ Add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Is there an existing section where we can add the references tag? for section in i18n.translate(self.site, referencesSections): sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( u'Adding references tag to existing %s section...\n' % section) newText = ( oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():] ) return newText else: break # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add ' 'the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection(oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurence in the temp text. # FIXME: # {{commons}} or {{commonscat}} are part of Weblinks section # * {{template}} is mostly part of a section # so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references ' u'section.\nPlacing it before interwiki links, categories, and ' u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index)
def addReferences(self, oldText): """ Add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Is there an existing section where we can add the references tag? for section in i18n.translate(self.site, referencesSections): sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( u'Adding references tag to existing %s section...\n' % section) newText = (oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():]) return newText else: break # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile( r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add ' 'the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection( oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurrence in the temp text. # FIXME: # {{commons}} or {{commonscat}} are part of Weblinks section # * {{template}} is mostly part of a section # so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile( r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references ' u'section.\nPlacing it before interwiki links, categories, and ' u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index)
def replace_links(self, text, linkedPage, targetPage): """Replace all source links by target.""" mysite = pywikibot.Site() linktrail = mysite.linktrail() # make a backup of the original text so we can show the changes later linkR = re.compile( r'\[\[(?P<title>[^\]\|#]*)(?P<section>#[^\]\|]*)?' r'(\|(?P<label>[^\]]*))?\]\](?P<linktrail>' + linktrail + ')') curpos = 0 # This loop will run until we have finished the current page while True: m = linkR.search(text, pos=curpos) if not m: break # Make sure that next time around we will not find this same hit. curpos = m.start() + 1 # ignore interwiki links, links in the disabled area # and links to sections of the same page if (m.group('title').strip() == '' or mysite.isInterwikiLink(m.group('title')) or isDisabled(text, m.start())): continue else: actualLinkPage = pywikibot.Page( targetPage.site, m.group('title')) # Check whether the link found is to page. try: actualLinkPage.title() except InvalidTitle: pywikibot.exception() continue if actualLinkPage != linkedPage: continue # The link looks like this: # [[page_title|link_text]]trailing_chars page_title = m.group('title') link_text = m.group('label') if not link_text: # or like this: [[page_title]]trailing_chars link_text = page_title if m.group('section') is None: section = '' else: section = m.group('section') if section and targetPage.section(): pywikibot.warning( 'Source section {0} and target section {1} found. ' 'Skipping.'.format(section, targetPage)) continue trailing_chars = m.group('linktrail') if trailing_chars: link_text += trailing_chars # remove preleading ":" if link_text[0] == ':': link_text = link_text[1:] if link_text[0].isupper() or link_text[0].isdigit(): new_page_title = targetPage.title() else: new_page_title = first_lower(targetPage.title()) # remove preleading ":" if new_page_title[0] == ':': new_page_title = new_page_title[1:] if (new_page_title == link_text and not section): newlink = '[[{}]]'.format(new_page_title) # check if we can create a link with trailing characters instead of # a pipelink elif (len(new_page_title) <= len(link_text) and (firstcap(link_text[:len(new_page_title)]) == firstcap(new_page_title)) and re.sub(re.compile(linktrail), '', link_text[len(new_page_title):]) == '' and not section): newlink = '[[{}]]{}'.format(link_text[:len(new_page_title)], link_text[len(new_page_title):]) else: newlink = '[[{}{}|{}]]'.format(new_page_title, section, link_text) text = text[:m.start()] + newlink + text[m.end():] continue return text
def addReferences(self, oldText): """ Add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. Also repair malformed references tags. Set the edit summary accordingly. @param oldText: page text to be modified @type oldText: str @return: The modified pagetext @rtype: str """ # Do we have a malformed <reference> tag which could be repaired? # Set the edit summary for this case self.comment = i18n.twtranslate(self.site, 'noreferences-fix-tag') # Repair two opening tags or a opening and an empty tag pattern = re.compile(r'< *references *>(.*?)' r'< */?\s*references */? *>', re.DOTALL) if pattern.search(oldText): pywikibot.output('Repairing references tag') return re.sub(pattern, r'<references>\1</references>', oldText) # Repair single unclosed references tag pattern = re.compile(r'< *references *>') if pattern.search(oldText): pywikibot.output('Repairing references tag') return re.sub(pattern, '<references />', oldText) # Is there an existing section where we can add the references tag? # Set the edit summary for this case self.comment = i18n.twtranslate(self.site, 'noreferences-add-tag') for section in i18n.translate(self.site, referencesSections): sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( 'Adding references tag to existing %s section...\n' % section) templates_or_comments = re.compile( r'^((?:\s*(?:\{\{[^\{\}]*?\}\}|<!--.*?-->))*)', flags=re.DOTALL) new_text = ( oldText[:match.end() - 1] + templates_or_comments.sub( r'\1\n{0}\n'.format(self.referencesText), oldText[match.end() - 1:])) return new_text else: break # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if textlib.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add ' 'the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection(oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.namespaces.CATEGORY) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurrence in the temp text. # FIXME: # {{commons}} or {{commonscat}} are part of Weblinks section # * {{template}} is mostly part of a section # so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references ' u'section.\nPlacing it before interwiki links, categories, and ' u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index)