def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', override=u'', addCategory=u'', removeCategories=False): """ Build the final description for the image. The description is based on the info from flickrinfo and improved. """ description = u'== {{int:filedesc}} ==\n%s' % flinfoDescription if removeCategories: description = pywikibot.removeCategoryLinks(description, pywikibot.Site( 'commons', 'commons')) if override: description = description.replace(u'{{cc-by-sa-2.0}}\n', u'') description = description.replace(u'{{cc-by-2.0}}\n', u'') description = description.replace(u'{{flickrreview}}\n', u'') description = description.replace( u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'') description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override) elif flickrreview: if reviewer: description = description.replace(u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}') if addCategory: description = description.replace(u'{{subst:unc}}\n', u'') description = description + u'\n[[Category:' + addCategory + ']]\n' description = description.replace(u'\r\n', u'\n') return description
def buildDescription(flinfoDescription=u'', flickrreview=False, reviewer=u'', override=u'', addCategory=u'', removeCategories=False): """ Build the final description for the image. The description is based on the info from flickrinfo and improved. """ description = u'== {{int:filedesc}} ==\n%s' % flinfoDescription if removeCategories: description = pywikibot.removeCategoryLinks( description, pywikibot.Site('commons', 'commons')) if override: description = description.replace(u'{{cc-by-sa-2.0}}\n', u'') description = description.replace(u'{{cc-by-2.0}}\n', u'') description = description.replace(u'{{flickrreview}}\n', u'') description = description.replace( u'{{copyvio|Flickr, licensed as "All Rights Reserved" which is not a free license --~~~~}}\n', u'') description = description.replace(u'=={{int:license}}==', u'=={{int:license}}==\n' + override) elif flickrreview: if reviewer: description = description.replace( u'{{flickrreview}}', u'{{flickrreview|' + reviewer + '|{{subst:CURRENTYEAR}}-{{subst:CURRENTMONTH}}-{{subst:CURRENTDAY2}}}}' ) if addCategory: description = description.replace(u'{{subst:unc}}\n', u'') description = description + u'\n[[Category:' + addCategory + ']]\n' description = description.replace(u'\r\n', u'\n') return description
def putAfterTemplate(oldtext, template, toadd, loose=True): ''' Try to put text after template. If the template is not found return False if loose is set to False If loose is set to True: Remove interwiki's, categories, add template, restore categories, restore interwiki's. Based on cc-by-sa-3.0 code by Dschwen ''' newtext = u'' templatePosition = oldtext.find(u'{{%s' % (template,)) if templatePosition >= 0: previousChar = u'' currentChar = u'' templatePosition += 2 curly = 1 square = 0 while templatePosition < len(oldtext): currentChar = oldtext[templatePosition] if currentChar == u'[' and previousChar == u'[': square += 1 previousChar = u'' if currentChar == u']' and previousChar == u']': square -= 1 previousChar = u'' if currentChar == u'{' and previousChar == u'{': curly += 1 previousChar = u'' if currentChar == u'}' and previousChar == u'}': curly -= 1 previousChar = u'' previousChar = currentChar templatePosition += 1 if curly == 0 and square <= 0: # Found end of template break newtext = oldtext[:templatePosition] + \ u'\n' + toadd + oldtext[templatePosition:] else: if loose: newtext = oldtext cats = pywikibot.getCategoryLinks(newtext) ll = pywikibot.getLanguageLinks(newtext) newtext = pywikibot.removeLanguageLinks(newtext) newtext = pywikibot.removeCategoryLinks(newtext) newtext += u'\n' + toadd newtext = pywikibot.replaceCategoryLinks(newtext, cats) newtext = pywikibot.replaceLanguageLinks(newtext, ll) return newtext
def rem(text): # delete table -- TODO: ^\{\{ or ^[\*\:\#]*\{\{ text = lre.rmsym(r"\{\|", r"\|\}", text) # delete template text = lre.rmsym(r"\{\{", r"\}\}", text) text = subst.process(text) text = pywikibot.removeDisabledParts(text) text = pywikibot.removeLanguageLinks(text) text = pywikibot.removeCategoryLinks(text) text = pywikibot.removeHTMLParts(text) return text
def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter): """ Remove the old categories and add the new categories to the image. """ newtext = pywikibot.removeCategoryLinks(imagepage.get(), imagepage.site()) if not onlyFilter: newtext = removeTemplates(newtext) newtext = newtext + getCheckCategoriesTemplate(usage, galleries, len(newcats)) newtext += u'\n' for category in newcats: newtext = newtext + u'[[Category:' + category + u']]\n' if onlyFilter: comment = u'Filtering categories' else: comment = u'Image is categorized by a bot using data from [[Commons:Tools#CommonSense|CommonSense]]' pywikibot.showDiff(imagepage.get(), newtext) imagepage.put(newtext, comment) return
def getNewFieldsFromFreetext(self, imagepage): """Extract fields from free text for the new information template.""" text = imagepage.get() for toRemove in sourceGarbage[imagepage.site.lang]: text = re.sub(toRemove, '', text, flags=re.IGNORECASE) for (regex, repl) in licenseTemplates[imagepage.site.lang]: text = re.sub(regex, '', text, flags=re.IGNORECASE) text = pywikibot.removeCategoryLinks(text, imagepage.site()) description = self.convertLinks(text.strip(), imagepage.site()) date = self.getUploadDate(imagepage) source = self.getSource(imagepage) author = self.getAuthorText(imagepage) return (description, date, source, author)
def getNewFieldsFromFreetext(self, imagepage): """Extract fields from free text for the new information template.""" text = imagepage.get() # text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE) # text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE) # text = re.sub('\{\{(self|self2)\|[^\}]+\}\}', '', text, re.IGNORECASE) for toRemove in sourceGarbage[imagepage.site.language()]: text = re.sub(toRemove, u'', text, flags=re.IGNORECASE) for (regex, repl) in licenseTemplates[imagepage.site.language()]: text = re.sub(regex, u'', text, flags=re.IGNORECASE) text = pywikibot.removeCategoryLinks(text, imagepage.site()).strip() description = self.convertLinks(text.strip(), imagepage.site()) date = self.getUploadDate(imagepage) source = self.getSource(imagepage) author = self.getAuthorText(imagepage) return (description, date, source, author)
def getNewFieldsFromFreetext(self, imagepage): """Try to extract fields from free text for the new information template.""" text = imagepage.get() # text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE) # text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE) # text = re.sub(u'\{\{(self|self2)\|[^\}]+\}\}', u'', text, re.IGNORECASE) for toRemove in sourceGarbage[imagepage.site.language()]: text = re.sub(toRemove, u'', text, flags=re.IGNORECASE) for (regex, repl) in licenseTemplates[imagepage.site.language()]: text = re.sub(regex, u'', text, flags=re.IGNORECASE) text = pywikibot.removeCategoryLinks(text, imagepage.site()).strip() description = self.convertLinks(text.strip(), imagepage.site()) date = self.getUploadDate(imagepage) source = self.getSource(imagepage) author = self.getAuthorText(imagepage) return (description, date, source, author)
def add_text(page=None, addText=None, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): if not addText: raise NoEnoughData('You have to specify what text you want to add!') if not summary: summary = i18n.twtranslate(pywikibot.Site(), 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 site = pywikibot.Site() pathWiki = site.family.nicepath(site.code) if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = '%s%s' % (pathWiki, page.title(asUrl=True)) result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip! Match was: %s''' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -except is in the page. Skip! Match was: %s''' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = pywikibot.getCategoryLinks(newtext, site) # Deleting the categories newtext = pywikibot.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = pywikibot.getLanguageLinks(newtext, site) # Removing the interwiki newtext = pywikibot.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = pywikibot.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = pywikibot.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All', 'open in Browser'], ['y', 'n', 'a', 'b'], 'n') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': webbrowser.open("http://%s%s" % ( page.site.hostname(), page.site.nice_get_address(page.title()) )) pywikibot.input("Press Enter when finished in browser.") if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)
def add_text(page=None, addText=None, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. starsList = [ u'bueno', u'bom interwiki', u'cyswllt[ _]erthygl[ _]ddethol', u'dolen[ _]ed', u'destacado', u'destaca[tu]', u'enllaç[ _]ad', u'enllaz[ _]ad', u'leam[ _]vdc', u'legătură[ _]a[bcf]', u'liamm[ _]pub', u'lien[ _]adq', u'lien[ _]ba', u'liên[ _]kết[ _]bài[ _]chất[ _]lượng[ _]tốt', u'liên[ _]kết[ _]chọn[ _]lọc', u'ligam[ _]adq', u'ligoelstara', u'ligoleginda', u'link[ _][afgu]a', u'link[ _]adq', u'link[ _]f[lm]', u'link[ _]km', u'link[ _]sm', u'linkfa', u'na[ _]lotura', u'nasc[ _]ar', u'tengill[ _][úg]g', u'ua', u'yüm yg', u'רא', u'وصلة مقالة جيدة', u'وصلة مقالة مختارة', ] errorCount = 0 site = pywikibot.getSite() pathWiki = site.family.nicepath(site.lang) site = pywikibot.getSite() if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', '\n') if (reorderEnabled): # Getting the categories categoriesInside = pywikibot.getCategoryLinks(newtext, site) # Deleting the categories newtext = pywikibot.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = pywikibot.getLanguageLinks(newtext, site) # Removing the interwiki newtext = pywikibot.removeLanguageLinks(newtext, site) # Adding the text newtext += u"\n%s" % addText # Reputting the categories newtext = pywikibot.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = pywikibot.removeDisabledParts(text) for star in starsList: regex = re.compile('(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + '\r\n\r\n' allstars.sort() for element in allstars: newtext += '%s\r\n' % element.strip() # Adding the interwiki newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"\n%s" % addText else: newtext = addText + '\n' + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) #pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if always or choice == 'y': try: pass if always: page.put(newtext, summary, minorEdit=False) else: page.put_async(newtext, summary, minorEdit=False) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < 5: pywikibot.output(u'Server Error! Wait..') time.sleep(5) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError, e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.PageNotSaved, error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always)
def snap(self, date): locale.setlocale(locale.LC_ALL, 'de_DE.utf8') monthName = pywikibot.date.monthName(self.site.language(), date.month) pywikibot.output(u'\n\ncreating snapshot for ' + self.format_date(date.day, monthName, date.year)) l_mainPageTitle = pywikibot.translate(self.site, mainPageTitle, fallback=False) l_archiveTitlePrefix = pywikibot.translate(self.site, archiveTitlePrefix, fallback=False) l_archivePageIntro = pywikibot.translate(self.site, archivePageIntro, fallback=False) l_archiveComment = pywikibot.translate(self.site, archiveComment, fallback=False) l_archivePageIntro = l_archivePageIntro.format(day=date.strftime('%d'), month=date.strftime('%m'), year=date.strftime('%Y')) title = self.format_date(date.day, monthName, date.year) self.archivePageTitle = l_archiveTitlePrefix + title archivePage = pywikibot.Page(self.site, self.archivePageTitle) i = -1 while True: i += 1 laststamp = self.mainversions[i][1] laststamp = laststamp.replace(tzinfo=tz.tzutc()).astimezone(self.localtz) if laststamp < date: revid = self.mainversions[i][0] text = self.mainPage.getOldVersion(revid) break text = text.replace(u'{{LOCALDAY}}', unicode(date.day)) text = text.replace(u'{{LOCALDAYNAME}}', date.strftime('%A')) text = text.replace(u'{{LOCALMONTHNAME}}', monthName) text = text.replace(u'{{LOCALYEAR}}', unicode(date.year)) text = text.replace(u'{{/Interwikis}}', u'') text = text.replace(u'{{FormatZahlLokal|{{ARTIKELANZAHL:R}}}}', self.numberofarticles[self.format_date(date.day, monthName, date.year)]) text = text.replace(u'{{ARTIKELANZAHL}}', self.numberofarticles[self.format_date(date.day, monthName, date.year)]) #templates in main page subspace replaced_templates = [l_mainPageTitle] templates = re.findall(u'{{/([^}]*?)}}', text) for t in templates: t_text = self.replace_template(l_mainPageTitle + u'/' + t, date) text = text.replace(u'{{/' + t + u'}}', t_text) replaced_templates.append(l_mainPageTitle + u'/' + t) templates = [u'Hauptseite Verstorbene'] #templates in Template: namespace for t in templates: t_text = self.replace_template( u'Vorlage:' + t, date) text = text.replace(u'{{' + t + u'}}', t_text) replaced_templates.append(t) pywikibot.output(u'Replaced templates: ' + unicode(replaced_templates)) text = text.replace(u'{{LOCALDAY}}', unicode(date.day)) text = text.replace(u'{{LOCALDAYNAME}}', date.strftime('%A')) text = text.replace(u'{{LOCALMONTHNAME}}', monthName) text = text.replace(u'{{LOCALYEAR}}', unicode(date.year)) remaining = [] code = mwparserfromhell.parse(text) for template in code.filter_templates(): remaining.append(unicode(template.name)) pywikibot.output(u'\nRemaining templates: ' + unicode(remaining)) archivePage.text = text archivePage.text = pywikibot.removeLanguageLinks(archivePage.expand_text()) archivePage.text = pywikibot.removeCategoryLinks(archivePage.text) archivePage.text = l_archivePageIntro + archivePage.text #print archivePage.text archivePage.save(comment=l_archiveComment, botflag=True, minor=False) self.redlinks(archivePage, date, monthName) time.sleep(15)
def add_text(page=None, addText=None, summary=None, regexSkip=None, regexSkipUrl=None, always=False, up=False, putText=True, oldTextGiven=None, reorderEnabled=True, create=False): if not addText: raise NoEnoughData('You have to specify what text you want to add!') site = page.site if not summary: summary = i18n.twtranslate(site, 'add_text-adding', {'adding': addText[:200]}) # When a page is tagged as "really well written" it has a star in the # interwiki links. This is a list of all the templates used (in regex # format) to make the stars appear. errorCount = 0 if putText: pywikibot.output(u'Loading %s...' % page.title()) if oldTextGiven is None: try: text = page.get() except pywikibot.NoPage: if create: pywikibot.output(u"%s doesn't exist, creating it!" % page.title()) text = u'' else: pywikibot.output(u"%s doesn't exist, skip!" % page.title()) return (False, False, always) except pywikibot.IsRedirectPage: pywikibot.output(u"%s is a redirect, skip!" % page.title()) return (False, False, always) else: text = oldTextGiven # Understand if the bot has to skip the page or not # In this way you can use both -except and -excepturl if regexSkipUrl is not None: url = site.nice_get_address(page.title(asUrl=True)) result = re.findall(regexSkipUrl, site.getUrl(url)) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -exceptUrl is in the page. Skip! Match was: %s''' % result) return (False, False, always) if regexSkip is not None: result = re.findall(regexSkip, text) if result != []: pywikibot.output( u'''Exception! regex (or word) used with -except is in the page. Skip! Match was: %s''' % result) return (False, False, always) # If not up, text put below if not up: newtext = text # Translating the \\n into binary \n addText = addText.replace('\\n', config.line_separator) if (reorderEnabled): # Getting the categories categoriesInside = pywikibot.getCategoryLinks(newtext, site) # Deleting the categories newtext = pywikibot.removeCategoryLinks(newtext, site) # Getting the interwiki interwikiInside = pywikibot.getLanguageLinks(newtext, site) # Removing the interwiki newtext = pywikibot.removeLanguageLinks(newtext, site) # Adding the text newtext += u"%s%s" % (config.line_separator, addText) # Reputting the categories newtext = pywikibot.replaceCategoryLinks(newtext, categoriesInside, site, True) # Dealing the stars' issue allstars = [] starstext = pywikibot.removeDisabledParts(text) for star in starsList: regex = re.compile( '(\{\{(?:template:|)%s\|.*?\}\}[\s]*)' % star, re.I) found = regex.findall(starstext) if found != []: newtext = regex.sub('', newtext) allstars += found if allstars != []: newtext = newtext.strip() + config.line_separator * 2 allstars.sort() for element in allstars: newtext += '%s%s' % (element.strip(), config.LS) # Adding the interwiki newtext = pywikibot.replaceLanguageLinks(newtext, interwikiInside, site) else: newtext += u"%s%s" % (config.line_separator, addText) else: newtext = addText + config.line_separator + text if putText and text != newtext: pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(text, newtext) # Let's put the changes. while True: # If someone load it as module, maybe it's not so useful to put the # text in the page if putText: if not always: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'All', 'open in Browser'], ['y', 'n', 'a', 'b'], 'n') if choice == 'a': always = True elif choice == 'n': return (False, False, always) elif choice == 'b': webbrowser.open( "http://%s%s" % (site.hostname(), site.nice_get_address(page.title(asUrl=True)))) pywikibot.input("Press Enter when finished in browser.") if always or choice == 'y': try: if always: page.put(newtext, summary, minorEdit=page.namespace() != 3) else: page.put_async(newtext, summary, minorEdit=page.namespace() != 3) except pywikibot.EditConflict: pywikibot.output(u'Edit conflict! skip!') return (False, False, always) except pywikibot.ServerError: errorCount += 1 if errorCount < config.max_retries: pywikibot.output(u'Server Error! Wait..') time.sleep(config.retry_wait) continue else: raise pywikibot.ServerError(u'Fifth Server Error!') except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) return (False, False, always) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % error.args) return (False, False, always) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % page.title()) return (False, False, always) else: # Break only if the errors are one after the other... errorCount = 0 return (True, True, always) else: return (text, newtext, always)