def __init__(self, generator, acceptall=False, limit=None, ignorepdf=False): """ - generator : Page generator - acceptall : boolean, is -always on ? - limit : int, stop after n modified pages - ignorepdf : boolean """ self.generator = generator self.acceptall = acceptall self.limit = limit self.ignorepdf = ignorepdf self.site = pywikibot.getSite() # Check manual = 'mw:Manual:Pywikibot/refLinks' code = None for alt in [self.site.code] + i18n._altlang(self.site.code): if alt in localized_msg: code = alt break if code: manual += '/%s' % code self.msg = i18n.twtranslate(self.site, 'reflinks-msg', locals()) self.stopPage = pywikibot.Page( self.site, pywikibot.translate(self.site, stopPage)) local = pywikibot.translate(self.site, badtitles) if local: bad = '(' + globalbadtitles + '|' + local + ')' else: bad = globalbadtitles self.titleBlackList = re.compile(bad, re.I | re.S | re.X) self.norefbot = noreferences.NoReferencesBot(None) self.deduplicator = DuplicateReferences() try: self.stopPageRevId = self.stopPage.latestRevision() except pywikibot.NoPage: pywikibot.output(u'The stop page %s does not exist' % self.stopPage.title(asLink=True)) raise # Regex to grasp content-type meta HTML tag in HTML source self.META_CONTENT = re.compile(ur'(?i)<meta[^>]*content\-type[^>]*>') # Extract the encoding from a charset property (from content-type !) self.CHARSET = re.compile(ur'(?i)charset\s*=\s*(?P<enc>[^\'";>/]*)') # Extract html title from page self.TITLE = re.compile(ur'(?is)(?<=<title>).*?(?=</title>)') # Matches content inside <script>/<style>/HTML comments self.NON_HTML = re.compile( ur'(?is)<script[^>]*>.*?</script>|<style[^>]*>.*?</style>|<!--.*?-->|<!\[CDATA\[.*?\]\]>' ) # Authorized mime types for HTML pages self.MIME = re.compile( ur'application/(?:xhtml\+xml|xml)|text/(?:ht|x)ml')
elif cols: text = text.replace(m.group(), '{{reflist|%s}}' % cols.group(2)) else: text = text.replace(m.group(), '{{reflist}}') # If more than 30 refs, make sure the reference section is multi column if text.count('</ref>') > 30: text = re.sub( r'(?is)(=\s+(<!--.*?-->)*\s*)(<references />|\{\{reflist\|?3?\}\})', r'\1{{reflist|colwidth=30em}}', text) elif text.count('</ref>') < 5: text = re.sub(r'(?is)(=\s+)\{\{reflist\|(\d+|colwidth=\d+\w+)\}\}', r'\1{{reflist}}', text) if noreferences: norefbot = noreferences.NoReferencesBot(None) if norefbot.lacksReferences(text, verbose=False): text = norefbot.addReferences(text) return text def test(): tests = (""" see, <ref /> after,\t<ref > class, <ref /> sdf her <ref /> <ref /> , \t but would... """, )