def parse3(self, text): """Parse the text. We divide the page text to 3 parts: A: text before the first == level section title B: text from the first section title to the first stopsection C: text from the first stopsection to the end. A and B will be returned as a tuple and poor C will be thrown away. """ lines = text.splitlines(1) comeon = True where = 0 linenum = 0 tx1 = tx2 = '' while comeon and linenum < len(lines): line = lines[linenum] if sectionpattern.match(line) and not pywikibot.isDisabled(text,where): tx2 = line comeon = False else: tx1 += line where += len(line) linenum += 1 comeon = True while comeon and linenum < len(lines): line = lines[linenum] if stopsectionregex.match(line) and not pywikibot.isDisabled(text,where): comeon = False else: tx2 += line where += len(line) linenum += 1 return (tx1, tx2)
def addReferences(self, oldText): """ Tries to add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Is there an existing section where we can add the references tag? for section in i18n.translate(self.site, referencesSections): sectionR = re.compile(r'\r?\n=+ *%s *=+ *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( u'Adding references tag to existing %s section...\n' % section) newText = ( oldText[:match.end()] + u'\n' + self.referencesText + u'\n' + oldText[match.end():] ) return newText else: break # Create a new section for the references tag for section in i18n.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r?\n(?P<ident>=+) *%s *(?P=ident) *\r?\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add ' 'the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection(oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurence in the temp text. ### fix me: ### {{commons}} or {{commonscat}} are part of Weblinks section ### * {{template}} is mostly part of a section ### so templatePattern must be fixed templatePattern = r'\r?\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile(r'(\r?\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references ' u'section.\nPlacing it before interwiki links, categories, and ' u'bottom templates.') index = len(tmpText) return self.createReferenceSection(oldText, index)
def addReferences(self, oldText): """ Tries to add a references tag into an existing section where it fits into. If there is no such section, creates a new section containing the references tag. * Returns : The modified pagetext """ # Is there an existing section where we can add the references tag? for section in pywikibot.translate(self.site, referencesSections): sectionR = re.compile(r'\r\n=+ *%s *=+ *\r\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, skipping.' % section) index = match.end() else: pywikibot.output( u'Adding references tag to existing %s section...\n' % section) newText = oldText[:match.end( )] + u'\n' + self.referencesText + u'\n' + oldText[ match.end():] return newText else: break # Create a new section for the references tag for section in pywikibot.translate(self.site, placeBeforeSections): # Find out where to place the new section sectionR = re.compile(r'\r\n(?P<ident>=+) *%s *(?P=ident) *\r\n' % section) index = 0 while index < len(oldText): match = sectionR.search(oldText, index) if match: if pywikibot.isDisabled(oldText, match.start()): pywikibot.output( 'Existing %s section is commented out, won\'t add the references in front of it.' % section) index = match.end() else: pywikibot.output( u'Adding references section before %s section...\n' % section) index = match.start() ident = match.group('ident') return self.createReferenceSection( oldText, index, ident) else: break # This gets complicated: we want to place the new references # section over the interwiki links and categories, but also # over all navigation bars, persondata, and other templates # that are at the bottom of the page. So we need some advanced # regex magic. # The strategy is: create a temporary copy of the text. From that, # keep removing interwiki links, templates etc. from the bottom. # At the end, look at the length of the temp text. That's the position # where we'll insert the references section. catNamespaces = '|'.join(self.site.category_namespaces()) categoryPattern = r'\[\[\s*(%s)\s*:[^\n]*\]\]\s*' % catNamespaces interwikiPattern = r'\[\[([a-zA-Z\-]+)\s?:([^\[\]\n]*)\]\]\s*' # won't work with nested templates # the negative lookahead assures that we'll match the last template # occurence in the temp text. ### fix me: ### {{commons}} or {{commonscat}} are part of Weblinks section ### * {{template}} is mostly part of a section ### so templatePattern must be fixed templatePattern = r'\r\n{{((?!}}).)+?}}\s*' commentPattern = r'<!--((?!-->).)*?-->\s*' metadataR = re.compile( r'(\r\n)?(%s|%s|%s|%s)$' % (categoryPattern, interwikiPattern, templatePattern, commentPattern), re.DOTALL) tmpText = oldText while True: match = metadataR.search(tmpText) if match: tmpText = tmpText[:match.start()] else: break pywikibot.output( u'Found no section that can be preceeded by a new references section.\nPlacing it before interwiki links, categories, and bottom templates.' ) index = len(tmpText) return self.createReferenceSection(oldText, index)