def main(): #page generator gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitle = [] # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] # Never ask before changing a page always = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() for arg in pywikibot.handleArgs(): if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-always': always = True else: if not genFactory.handleArg(arg): pageTitle.append(arg) if pageTitle: page = pywikibot.Page(pywikibot.getSite(), ' '.join(pageTitle)) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.getSite() try: cat = maintenance_category[site.family.name][site.lang] except: pass else: import catlib if not namespaces: namespaces = [0] cat = catlib.Category(site, "%s:%s" % (site.category_namespace(), cat)) gen = pagegenerators.CategorizedPageGenerator(cat) if not gen: pywikibot.showHelp('noreferences') else: if namespaces: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = NoReferencesBot(preloadingGen, always) bot.run()
def main(*args): pywikibot.output('Starting hewiki-replacebot') editSummary = replaceConfig.defaultSummary xmlFilename = None xmlStart = None for arg in pywikibot.handle_args(*args): if arg.startswith('-summary:'): editSummary = arg[9:] elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input('Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] if xmlFilename is None: pywikibot.output('no xml dump specified. please fill -xml and the xml file to be used') return replaceDict = fillReplementsDict() safe_templates = replaceConfig.safeTemplates # add external links templates site = pywikibot.Site() for safeCategory in replaceConfig.safeTemplatesCategories: cite_templates = pywikibot.Category(site, safeCategory).articles(namespaces=10, recurse=True) cite_templates = [page.title(withNamespace=False) for page in cite_templates] safe_templates += cite_templates file_usage_rgx = re.compile(replaceConfig.fileUsageRgx, re.I) yiRgx = re.compile('\[\[yi:.*?\]\]') safeTemplatesRgx = re.compile('\{\{(' + '|'.join(safe_templates, ) + ').*?\}\}', re.I) exceptions = { 'title': [], 'text-contains': [re.compile(replaceConfig.redirectRgx, re.I)], 'inside': [file_usage_rgx, safeTemplatesRgx, yiRgx], 'inside-tags': ['nowiki', 'math', 'comment', 'pre', 'source', 'hyperlink', 'gallery'], 'require-title': [], } # avoid searching in other namespaces in the xml exceptions_with_title_ns = dict(exceptions) exceptions_with_title_ns['title'] = [re.compile('^'+re.escape(ns_name)+':') for ns_index, ns in site.namespaces.items() if ns_index not in replaceConfig.namespaces for ns_name in ns] gen = XmlDumpReplacePageGeneratorHe(replaceDict, xmlFilename, xmlStart, exceptions_with_title_ns, site) gen_factory = pywikibot.pagegenerators.GeneratorFactory() gen = gen_factory.getCombinedGenerator(gen) gen = pywikibot.pagegenerators.NamespaceFilterPageGenerator(gen, replaceConfig.namespaces, site) gen = pywikibot.pagegenerators.PreloadingGenerator(gen) pywikibot.output('starting replace') bot = ReplaceRobotHe(gen, replaceDict, exceptions, editSummary, site=site) site.login() bot.run() pywikibot.output('finished all replacements')
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] genFactory.gens.append( XmlDumpNoReferencesPageGenerator(xmlFilename)) elif arg == '-always': options['always'] = True elif arg == '-quiet': options['verbose'] = False else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.Site() try: cat = site.expand_text( site.mediawiki_message(maintenance_category)) except: pass else: cat = pywikibot.Category(site, "%s:%s" % (site.namespaces.CATEGORY, cat)) gen = cat.articles(namespaces=genFactory.namespaces or [0]) if gen: bot = NoReferencesBot(gen, **options) bot.run() return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def getCategoryGen(self, arg, length, recurse=False): site = pywikibot.getSite() if len(arg) == length: categoryname = i18n.input("pywikibot-enter-category-name") else: categoryname = arg[length + 1 :] categoryname = categoryname.replace("#", "|") ind = categoryname.find("|") startfrom = None if ind > 0: startfrom = categoryname[ind + 1 :] categoryname = categoryname[:ind] cat = catlib.Category(site, "%s:%s" % (site.namespace(14), categoryname)) return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] genFactory.gens.append(XmlDumpNoReferencesPageGenerator(xmlFilename)) elif arg == '-always': options['always'] = True elif arg == '-quiet': options['verbose'] = False else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.Site() try: cat = site.expand_text( site.mediawiki_message(maintenance_category)) except: pass else: cat = pywikibot.Category(site, "%s:%s" % ( site.category_namespace(), cat)) gen = cat.articles(namespaces=genFactory.namespaces or [0]) if gen: bot = NoReferencesBot(gen, **options) bot.run() return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def getCategoryGen(self, arg, length, recurse=False): site = pywikibot.getSite() if len(arg) == length: categoryname = i18n.input('pywikibot-enter-category-name') else: categoryname = arg[length + 1:] categoryname = categoryname.replace('#', '|') ind = categoryname.find('|') startfrom = None if ind > 0: startfrom = categoryname[ind + 1:] categoryname = categoryname[:ind] cat = catlib.Category(site, "%s:%s" % (site.namespace(14), categoryname)) return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse)
def setSubCategoriesGen(self, arg, length, recurse=False, content=False): if len(arg) == length: categoryname = i18n.input('pywikibot-enter-category-name') else: categoryname = arg[length + 1:] ind = categoryname.find('|') if ind > 0: startfrom = categoryname[ind + 1:] categoryname = categoryname[:ind] else: startfrom = None cat = pywikibot.Category(pywikibot.Link(categoryname, defaultNamespace=14)) return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse, content=content)
def setSubCategoriesGen(self, arg, length, recurse=False): site = pywikibot.getSite() if len(arg) == length: categoryname = i18n.input('pywikibot-enter-category-name') else: categoryname = arg[length + 1:] ind = categoryname.find('|') if ind > 0: startfrom = categoryname[ind + 1:] categoryname = categoryname[:ind] else: startfrom = None cat = catlib.Category(site, "%s:%s" % (site.namespace(14), categoryname)) return SubCategoriesPageGenerator(cat, start=startfrom, recurse=recurse)
def getCategoryGen(self, arg, length, recurse=False, content=False): if len(arg) == length: categoryname = i18n.input('pywikibot-enter-category-name') else: categoryname = arg[length + 1:] categoryname = categoryname.replace('#', '|') ind = categoryname.find('|') startfrom = None if ind > 0: startfrom = categoryname[ind + 1:] categoryname = categoryname[:ind] cat = pywikibot.Category(pywikibot.Link(categoryname, defaultNamespace=14)) # Link constructor automatically prepends localized namespace # if not included in user's input return CategorizedPageGenerator(cat, start=startfrom, recurse=recurse, content=content)
def main(*args) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments :type args: str """ options = {} gen = None # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: opt, _, value = arg.partition(':') if opt == '-xml': xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') gen = XmlDumpNoReferencesPageGenerator(xmlFilename) elif opt == '-always': options['always'] = True elif opt == '-quiet': options['verbose'] = False else: genFactory.handle_arg(arg) gen = genFactory.getCombinedGenerator(gen, preload=True) if not gen: site = pywikibot.Site() cat = site.page_from_repository(maintenance_category) if cat: gen = cat.articles(namespaces=genFactory.namespaces or [0]) if gen: bot = NoReferencesBot(generator=gen, **options) bot.run() else: pywikibot.bot.suggest_help(missing_generator=True)
def main(): options = {} # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] genFactory.gens.append(XmlDumpNoReferencesPageGenerator(xmlFilename)) elif arg == '-always': options['always'] = True elif arg == '-quiet': options['verbose'] = False else: genFactory.handleArg(arg) gen = genFactory.getCombinedGenerator() if not gen: site = pywikibot.Site() try: cat = site.expand_text( site.mediawiki_message(maintenance_category)) except: pass else: cat = pywikibot.Category(site, "%s:%s" % ( site.category_namespace(), cat)) gen = cat.articles(namespaces=genFactory.namespaces or [0]) if gen: bot = NoReferencesBot(gen, **options) bot.run() else: pywikibot.showHelp()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-allowoverlap'): allowoverlap = True else: commandline_replacements.append(arg) site = pywikibot.Site() if (len(commandline_replacements) % 2): raise pywikibot.Error('require even number of replacements.') if not commandline_replacements: if fixes_set: manual = pywikibot.input_yn('Replacements via -fix: set. Apply ' 'also manual replacements?', default=False) else: manual = True if manual: old = pywikibot.input(u'Please enter the text that should be replaced:') while old: new = pywikibot.input(u'Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( u'Please enter another text that should be replaced,' + u'\nor press Enter to start:') single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', {'description': ' (-%s +%s)' % (replacement.old, replacement.new)} ) replacements.append(replacement) if not edit_summary: if single_summary: pywikibot.output(u'The summary message for the command line ' 'replacements will be something like: %s' % single_summary) if fixes_set: pywikibot.output('If a summary is defined for the fix, this ' 'default summary won\'t be applied.') edit_summary = pywikibot.input( u'Press Enter to use this automatic message, or enter a ' + u'description of the\nchanges your bot will make:') # Perform one of the predefined actions. for fix in fixes_set: try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % ', '.join(fixes.fixes.keys())) return if "msg" in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None for replacement in fix['replacements']: summary = set_summary if len(replacement) < 3 else replacement[2] replacements.append(Replacement( old=replacement[0], new=replacement[1], use_regex=fix.get('regex'), edit_summary=summary, exceptions=fix.get('exceptions'), case_insensitive=fix.get('nocase') )) # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, site) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = pywikibot.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = i18n.input( 'pywikibot-enter-file-links-processing') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = pywikibot.ImagePage(site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number = int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number = int(arg[11:])) elif arg.startswith('-usercontribs'): gen = UserContributionsGenerator(arg[14:]) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number = int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(pywikibot.Link(title, pywikibot.Site())) gen = InterwikiPageGenerator(page) elif arg.startswith('-recentchanges'): if len(arg) >= 15: gen = RecentChangesPageGenerator(total=int(arg[15:])) else: gen = RecentChangesPageGenerator(total=60) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-ns'): if len(arg) == len('-ns'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True elif arg.startswith('-step'): if len(arg) == len('-step'): self.step = int(pywikibot.input("What is the step value?")) else: self.step = int(arg[len('-step:'):]) return True elif arg.startswith('-limit'): if len(arg) == len('-limit'): self.limit = int(pywikibot.input("What is the limit value?")) else: self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse = True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse = True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [pywikibot.Page( pywikibot.Link( pywikibot.input( u'What page do you want to use?'), pywikibot.getSite()) )] else: gen = [pywikibot.Page(pywikibot.Link(arg[len('-page:'):], pywikibot.getSite()) )] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = pywikibot.input( u'Links to which page should be processed?') referredPage = pywikibot.Page(pywikibot.Link(referredPageTitle, pywikibot.Site())) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = pywikibot.input( u'Links from which page should be processed?') linkingPage = pywikibot.Page(pywikibot.Link(linkingPageTitle, pywikibot.Site())) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = pywikibot.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page( pywikibot.Link(transclusionPageTitle, defaultNamespace=10, source=pywikibot.Site())) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') firstpagelink = pywikibot.Link(firstPageTitle, pywikibot.Site()) namespace = firstpagelink.namespace firstPageTitle = firstpagelink.title gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(total=int(limit)) elif arg.startswith('-newpages'): if len(arg) >= 10: gen = NewpagesPageGenerator(total=int(arg[10:])) else: gen = NewpagesPageGenerator(total=60) elif arg.startswith('-imagesused'): imagelinkstitle = arg[len('-imagesused:'):] if not imagelinkstitle: imagelinkstitle = pywikibot.input( u'Images on which page should be processed?') imagelinksPage = pywikibot.Page(pywikibot.Link(imagelinkstitle, pywikibot.Site())) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') # In order to be useful, all namespaces are required gen = SearchPageGenerator(mediawikiQuery, namespaces = []) elif arg.startswith('-google'): gen = GoogleSearchPageGenerator(arg[8:]) elif arg.startswith('-titleregex'): if len(arg) == 6: regex = pywikibot.input( u'What page names are you looking for?') else: regex = arg[7:] gen = RegexFilterPageGenerator(pywikibot.Site().allpages(), regex) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) else: pass if gen: self.gens.append(gen) return True else: return False
def main(*args): add_cat = None gen = None # summary message summary_commandline = False # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This is the maximum number of pages to load per query maxquerysize = 60 # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': u''}) # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Do not save the page titles, rather work on wiki filename = None # The name of the file to save titles titlefile = None # The file object itself # If we save, primary behaviour is append rather then new file append = True # Default: don't write titles to exception file and don't read them. excoutfilename = None # The name of the file to save exceptions excoutfile = None # The file object itself # excinfilename: reserved for later use (reading back exceptions) # If we save exceptions, primary behaviour is append excappend = True # Read commandline parameters. for arg in pywikibot.handleArgs(*args): if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg =='-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(pywikibot.input( u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-saveexcnew'): excappend = False if len(arg) == 11: excoutfilename = pywikibot.input( u'Please enter the filename to save the excepted titles' + u'\n(will be deleted if exists):') else: excoutfilename = arg[12:] elif arg.startswith('-saveexc'): if len(arg) == 8: excoutfilename = pywikibot.input( u'Please enter the filename to save the excepted titles:') else: excoutfilename = arg[9:] elif arg.startswith('-savenew'): append = False if len(arg) == 8: filename = pywikibot.input( u'Please enter the filename to save the titles' + u'\n(will be deleted if exists):') else: filename = arg[9:] elif arg.startswith('-save'): if len(arg) == 5: filename = pywikibot.input( u'Please enter the filename to save the titles:') else: filename = arg[6:] elif arg.startswith('-replacementfile'): if len(arg) == len('-replacementfile'): replacefile = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacefile = arg[len('-replacementfile')+1:] try: commandline_replacements.extend( [x.lstrip(u'\uFEFF').rstrip('\r\n') for x in codecs.open(replacefile, 'r', 'utf-8')]) except IOError: raise pywikibot.Error( '\n%s cannot be opened. Try again :-)' % replacefile) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): editSummary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-query:'): maxquerysize = int(arg[7:]) else: if not genFactory.handleArg(arg): commandline_replacements.append(arg) if pywikibot.verbose: pywikibot.output(u"commandline_replacements: " + ', '.join(commandline_replacements)) if (len(commandline_replacements) % 2): raise pywikibot.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix is None): replacements.append((commandline_replacements[0], commandline_replacements[1])) if not summary_commandline: editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': ' (-%s +%s)' % (commandline_replacements[0], commandline_replacements[1])}) elif (len(commandline_replacements) > 1): if (fix is None): for i in xrange (0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if not summary_commandline: pairs = [( commandline_replacements[i], commandline_replacements[i + 1] ) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': replacementsDescription}) else: raise pywikibot.Error( 'Specifying -fix with replacements is undefined') elif fix is None: old = pywikibot.input(u'Please enter the text that should be replaced:') new = pywikibot.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = pywikibot.input( u'Please enter another text that should be replaced,' + u'\nor press Enter to start:') if old == '': change += ')' break new = i18n.input('pywikibot-enter-new-text') change += ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline: default_summary_message = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': change}) pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( u'Press Enter to use this default message, or enter a ' + u'description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message editSummary = summary_message else: # Perform one of the predefined actions. fixname = fix # Save the name for passing to exceptions function. try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if "regex" in fix: regex = fix['regex'] if "msg" in fix: if isinstance(fix['msg'], basestring): editSummary = i18n.twtranslate(pywikibot.getSite(), str(fix['msg'])) else: editSummary = pywikibot.translate(pywikibot.getSite(), fix['msg']) if "exceptions" in fix: exceptions = fix['exceptions'] # Try to append common extensions for multiple fixes. # It must be either a dictionary or a function that returns a dict. if 'include' in exceptions: incl = exceptions['include'] if callable(incl): baseExcDict = incl(fixname) else: try: baseExcDict = incl except NameError: pywikibot.output( u'\nIncluded exceptions dictionary does not exist.' + u' Continuing with the exceptions\ngiven in fix.\n') baseExcDict = None if baseExcDict: for l in baseExcDict: try: exceptions[l].extend(baseExcDict[l]) except KeyError: exceptions[l] = baseExcDict[l] if "recursive" in fix: recursive = fix['recursive'] if "nocase" in fix: caseInsensitive = fix['nocase'] try: replacements = fix['replacements'] # enable regex/replacements as a dictionary for different langs if isinstance(replacements, dict): replacements = replacements[pywikibot.getSite().lang] except KeyError: pywikibot.output( u"No replacements given in fix.") return # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in [ 'title', 'require-title', 'text-contains', 'inside']: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [pywikibot.Page(pywikibot.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=maxquerysize) # Finally we open the file for page titles or set parameter article to None if filename: try: # This opens in strict error mode, that means bot will stop # on encoding errors with ValueError. # See http://docs.python.org/library/codecs.html#codecs.open titlefile = codecs.open(filename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(append)) except IOError: pywikibot.output("%s cannot be opened for writing." % filename) return # The same process with exceptions file: if excoutfilename: try: excoutfile = codecs.open( excoutfilename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(excappend)) except IOError: pywikibot.output("%s cannot be opened for writing." % excoutfilename) return bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, editSummary, titlefile, excoutfile) try: bot.run() finally: # Just for the spirit of programming (they were flushed) if titlefile: titlefile.close() if excoutfile: excoutfile.close()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None step = None pagename = None for arg in pywikibot.handle_args(args): if arg == 'double' or arg == 'do': action = 'double' elif arg == 'broken' or arg == 'br': action = 'broken' elif arg == 'both': action = 'both' elif arg == '-fullscan': fullscan = True elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-moves'): moved_pages = True elif arg.startswith('-namespace:'): ns = arg[11:] if ns == '': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif arg.startswith('-offset:'): offset = int(arg[8:]) elif arg.startswith('-start:'): start = arg[7:] elif arg.startswith('-until:'): until = arg[7:] elif arg.startswith('-total:'): number = int(arg[7:]) elif arg.startswith('-step:'): step = int(arg[6:]) elif arg.startswith('-page:'): pagename = arg[6:] elif arg == '-always': options['always'] = True elif arg == '-delete': options['delete'] = True else: pywikibot.output(u'Unknown argument: %s' % arg) if ( not action or xmlFilename and moved_pages or fullscan and xmlFilename ): problems = [] if xmlFilename and moved_pages: problems += ['Either use a XML file or the moved pages from the API'] if xmlFilename and fullscan: problems += ['Either use a XML file or do a full scan using the API'] pywikibot.bot.suggest_help(additional_text='\n'.join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step, pagename) bot = RedirectRobot(action, gen, number=number, **options) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ add_cat = None gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a separate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False sql_query = None # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith(('-sql', '-mysqlquery')): if arg.startswith('-sql'): issue_deprecation_warning('The usage of "-sql"', '-mysqlquery', 1, ArgumentDeprecationWarning, since='20180617') useSql = True sql_query = arg.partition(':')[2] elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-automaticsummary'): edit_summary = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): issue_deprecation_warning('-replacementfile', '-pairsfile', 2, ArgumentDeprecationWarning, since='20160304') elif arg.startswith('-pairsfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-pairsfile': replacement_file = pywikibot.input( 'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-pairsfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error('-pairsfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error('Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip('\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input('Please enter the text that should be replaced:') while old: new = pywikibot.input('Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', { 'description': ' (-{0} +{1})'.format(replacement.old, replacement.new) }) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {0}'.format( ', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix ' '"{0}"'.format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, basestring): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handleArg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format( fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: ' '{0}'.format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: if not sql_query: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = sql_query or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen, preload=True) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return False bot = ReplaceRobot(gen, replacements, exceptions, allowoverlap, recursive, add_cat, sleep, edit_summary, always=acceptall, site=site) site.login() bot.run() # Explicitly call pywikibot.stopme(). It will make sure the callback is # triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} gen_options = {} # what the bot should do (either resolve double redirs, or process broken # redirs) action = None namespaces = [] source = set() for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(':') option = arg.partition('-')[2] # bot options if arg == 'do': action = 'double' elif arg == 'br': action = 'broken' elif arg in ('both', 'broken', 'double'): action = arg elif option in ('always', 'delete'): options[option] = True elif option == 'total': options[option] = gen_options[option] = int(value) elif option == 'sdtemplate': options['sdtemplate'] = value or pywikibot.input( 'Which speedy deletion template to use?') # generator options elif option in ('fullscan', 'moves'): gen_options[option] = True source.add(arg) elif option == 'xml': gen_options[option] = value or i18n.input( 'pywikibot-enter-xml-filename') source.add(arg) elif option == 'namespace': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = value or i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif option == 'offset': gen_options[option] = int(value) elif option in ('page', 'start', 'until'): gen_options[option] = value # deprecated or unknown options elif option == 'step': issue_deprecation_warning('The usage of "{0}"'.format(arg), 2, ArgumentDeprecationWarning) else: pywikibot.output(u'Unknown argument: %s' % arg) gen_options['namespaces'] = namespaces if len(source) > 1: problem = 'You can only use one of {0} options.'.format( ' or '.join(source)) pywikibot.bot.suggest_help(additional_text=problem, missing_action=not action) return if not action: pywikibot.bot.suggest_help(missing_action=True) else: pywikibot.Site().login() options['generator'] = RedirectGenerator(action, **gen_options) bot = RedirectRobot(action, **options) bot.run()
def main(): gen = None singlePageTitle = [] xmlFilename = None # Which namespaces should be processed? # default to [] which means all namespaces will be processed namespaces = [] HTTPignore = [] day = 7 # Process global args and prepare generator args parser local_args = pywikibot.handleArgs() genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg.startswith('-namespace:'): try: namespaces.append(int(arg[11:])) except ValueError: namespaces.append(arg[11:]) elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): HTTPignore.append(int(arg[8:])) elif arg.startswith('-day:'): day = int(arg[5:]) elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] else: if not genFactory.handleArg(arg): singlePageTitle.append(arg) if singlePageTitle: singlePageTitle = ' '.join(singlePageTitle) page = pywikibot.Page(pywikibot.Site(), singlePageTitle) gen = iter([page]) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpPageGenerator(xmlFilename, xmlStart, namespaces) if not gen: gen = genFactory.getCombinedGenerator() if gen: if namespaces != []: gen = pagegenerators.NamespaceFilterPageGenerator(gen, namespaces) # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2) gen = pagegenerators.PreloadingGenerator(gen, step=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore, day) try: bot.run() finally: waitTime = 0 # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: pywikibot.output(u"Waiting for remaining %i threads to " u"finish, please wait..." % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: pywikibot.output(u'Interrupted.') break if countLinkCheckThreads() > 0: pywikibot.output(u'Remaining %i threads will be killed.' % countLinkCheckThreads()) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() # wait until the report thread is shut down; the user can # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: pywikibot.output(u'Report thread interrupted.') bot.history.reportThread.kill() pywikibot.output(u'Saving history...') bot.history.save() else: pywikibot.showHelp()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or process broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None pagename = None for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(':') option = arg[1:] # bot options if arg == 'do': action = 'double' elif arg == 'br': action = 'broken' elif arg in ('both', 'broken', 'double'): action = arg elif option in ('always', 'delete'): options[option] = True elif option == 'total': options['number'] = number = int(value) # generator options elif option == 'fullscan': fullscan = True elif option == 'xml': xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') elif option == 'moves': moved_pages = True elif option == 'namespace': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = value or i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif option == 'offset': offset = int(value) elif option == 'start': start = value elif option == 'until': until = value elif option == 'page': pagename = value # deprecated or unknown options elif option == 'step': issue_deprecation_warning('The usage of "{0}"'.format(arg), 2, ArgumentDeprecationWarning) else: pywikibot.output(u'Unknown argument: %s' % arg) if not action or xmlFilename and (moved_pages or fullscan): problems = [] if xmlFilename and moved_pages: problems += [ 'Either use a XML file or the moved pages from the API' ] if xmlFilename and fullscan: problems += [ 'Either use a XML file or do a full scan using the API' ] pywikibot.bot.suggest_help(additional_text='\n'.join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, pagename) bot = RedirectRobot(action, gen, **options) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ gen = None xmlFilename = None HTTPignore = [] if isinstance(memento_client, ImportError): warn('memento_client not imported: {0}'.format(memento_client), ImportWarning) # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): HTTPignore.append(int(arg[8:])) elif arg.startswith('-day:'): config.weblink_dead_days = int(arg[5:]) elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] else: genFactory.handleArg(arg) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces) if not gen: gen = genFactory.getCombinedGenerator() if gen: if not genFactory.nopreload: # fetch at least 240 pages simultaneously from the wiki, but more # if a high thread number is set. pageNumber = max(240, config.max_external_links * 2) gen = pagegenerators.PreloadingGenerator(gen, groupsize=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore, config.weblink_dead_days) try: bot.run() finally: waitTime = 0 # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: pywikibot.output('Waiting for remaining {0} threads to ' 'finish, please wait...'.format( countLinkCheckThreads())) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: pywikibot.output('Interrupted.') break if countLinkCheckThreads() > 0: pywikibot.output( 'Remaining {0} threads will be killed.'.format( countLinkCheckThreads())) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() # wait until the report thread is shut down; the user can # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: pywikibot.output('Report thread interrupted.') bot.history.reportThread.kill() pywikibot.output('Saving history...') bot.history.save() return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not (self.articles or page.canBeEdited()): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break new_text = self.doReplacements(new_text) if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if self.recursive: newest_text = self.doReplacements(new_text) while newest_text != new_text: new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = page.categories() if self.addedCat not in cats: cats.append(self.addedCat) new_text = pywikibot.replaceCategoryLinks( new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.acceptall: break if self.exctitles: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', [ 'Yes', 'No', 'no+eXcept', 'Edit', 'open in Browser', 'All', 'Quit' ], ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N') else: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', [ 'Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit' ], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % (page.site.hostname(), page.site.nice_get_address(page.title()))) i18n.input('pywikibot-enter-finished-browser') try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'q': self.writeEditCounter() self.writeExceptCounter() return if choice == 'a': self.acceptall = True if choice == 'x': # May happen only if self.exctitles isn't None self.exctitles.write(u"ur'^%s$',\n" % re.escape(page.title())) self.exctitles.flush() self.exceptcounter += 1 if choice == 'y': if not self.articles: # Primary behaviour: working on wiki page.put_async(new_text, self.editSummary) self.editcounter += 1 # Bug: this increments even if put_async fails # This is separately in two clauses of if for # future purposes to get feedback form put_async else: # Save the title for later processing instead of editing self.editcounter += 1 self.articles.write(u'#%s\n%s' % (page.title( asLink=True, textlink=True), self.splitLine())) self.articles.flush() # For the peace of our soul :-) # choice must be 'N' break if self.acceptall and new_text != original_text: if not self.articles: #Primary behaviour: working on wiki try: page.put(new_text, self.editSummary) self.editcounter += 1 # increment only on success except pywikibot.EditConflict: pywikibot.output( u'Skipping %s because of edit conflict' % (page.title(), )) except pywikibot.SpamfilterError, e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved, error: pywikibot.error(u'putting page: %s' % (error.args, )) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(), ))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None step = None pagename = None for arg in pywikibot.handle_args(args): if arg == 'double' or arg == 'do': action = 'double' elif arg == 'broken' or arg == 'br': action = 'broken' elif arg == 'both': action = 'both' elif arg == '-fullscan': fullscan = True elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-moves'): moved_pages = True elif arg.startswith('-namespace:'): ns = arg[11:] if ns == '': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif arg.startswith('-offset:'): offset = int(arg[8:]) elif arg.startswith('-start:'): start = arg[7:] elif arg.startswith('-until:'): until = arg[7:] elif arg.startswith('-total:'): number = int(arg[7:]) elif arg.startswith('-step:'): step = int(arg[6:]) elif arg.startswith('-page:'): pagename = arg[6:] elif arg == '-always': options['always'] = True elif arg == '-delete': options['delete'] = True else: pywikibot.output(u'Unknown argument: %s' % arg) if (not action or xmlFilename and moved_pages or fullscan and xmlFilename): problems = [] if xmlFilename and moved_pages: problems += [ 'Either use a XML file or the moved pages from the API' ] if xmlFilename and fullscan: problems += [ 'Either use a XML file or do a full scan using the API' ] pywikibot.bot.suggest_help(additional_text='\n'.join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step, pagename) bot = RedirectRobot(action, gen, number=number, **options) bot.run()
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = pywikibot.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = i18n.input( 'pywikibot-enter-file-links-processing') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = pywikibot.ImagePage( site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number=int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number=int(arg[11:])) elif arg.startswith('-usercontribs'): args = arg[14:].split(';') number = None try: number = int(args[1]) except: number = 250 gen = UserContributionsGenerator(args[0], number, namespaces=self.getNamespaces) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number=int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(site, title) gen = InterwikiPageGenerator(page) elif arg.startswith('-randomredirect'): if len(arg) == 15: gen = RandomRedirectPageGenerator() else: gen = RandomRedirectPageGenerator(number=int(arg[16:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator() else: gen = RandomPageGenerator(number=int(arg[8:])) elif arg.startswith('-recentchanges'): if len(arg) >= 15: gen = RecentchangesPageGenerator(number=int(arg[15:]), nobots=False) else: gen = RecentchangesPageGenerator(nobots=False) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-rc-nobots'): if len(arg) >= 11: gen = RecentchangesPageGenerator(number=int(arg[11:]), nobots=True) else: gen = RecentchangesPageGenerator(nobots=True) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-ns'): if len(arg) == len('-ns'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True elif arg.startswith('-limit'): if len(arg) == len('-limit'): self.limit = int(pywikibot.input("What is the limit value?")) else: self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse=True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse=True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [ pywikibot.Page( site, pywikibot.input(u'What page do you want to use?')) ] else: gen = [pywikibot.Page(site, arg[len('-page:'):])] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncattemplates'): gen = UnCategorizedTemplatesGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = pywikibot.input( u'Links to which page should be processed?') referredPage = pywikibot.Page(site, referredPageTitle) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = pywikibot.input( u'Links from which page should be processed?') linkingPage = pywikibot.Page(site, linkingPageTitle) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = pywikibot.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page( site, "%s:%s" % (site.namespace(10), transclusionPageTitle)) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-gorandom'): for firstPage in RandomPageGenerator(number=1): firstPageTitle = firstPage.title() namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') if self.namespaces != []: namespace = self.namespaces[0] else: namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-redirectonly'): firstPageTitle = arg[14:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page( site, firstPageTitle).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects='only') elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(number=int(limit)) elif arg == ('-new') or arg.startswith('-new:'): if len(arg) >= 5: gen = NewpagesPageGenerator(number=int(arg[5:])) else: gen = NewpagesPageGenerator(number=60) elif arg.startswith('-imagelinks'): imagelinkstitle = arg[len('-imagelinks:'):] if not imagelinkstitle: imagelinkstitle = pywikibot.input( u'Images on which page should be processed?') imagelinksPage = pywikibot.Page(site, imagelinkstitle) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=self.getNamespaces) elif arg.startswith('-titleregex'): if len(arg) == 11: regex = pywikibot.input( u'What page names are you looking for?') else: regex = arg[12:] gen = RegexFilterPageGenerator(site.allpages(), [regex]) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) elif arg.startswith('-'): mode, log, user = arg.partition('log') # exclude -log, -nolog if log == 'log' and mode not in ['-', '-no']: number = 500 if not user: user = None else: try: number = int(user[1:]) user = None except ValueError: user = user[1:] if user: result = user.split(';') user = result[0] try: number = int(result[1]) except: pass gen = LogpagesPageGenerator(number, mode[1:], user) if gen: self.gens.append(gen) return self.getCombinedGenerator() else: return False
def main(*args): add_cat = None gen = None # summary message summary_commandline = False # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This is the maximum number of pages to load per query maxquerysize = 60 # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. editSummary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': u''}) # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Do not save the page titles, rather work on wiki filename = None # The name of the file to save titles titlefile = None # The file object itself # If we save, primary behaviour is append rather then new file append = True # Default: don't write titles to exception file and don't read them. excoutfilename = None # The name of the file to save exceptions excoutfile = None # The file object itself # excinfilename: reserved for later use (reading back exceptions) # If we save exceptions, primary behaviour is append excappend = True # Read commandline parameters. for arg in pywikibot.handleArgs(*args): if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append( pywikibot.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-saveexcnew'): excappend = False if len(arg) == 11: excoutfilename = pywikibot.input( u'Please enter the filename to save the excepted titles' + u'\n(will be deleted if exists):') else: excoutfilename = arg[12:] elif arg.startswith('-saveexc'): if len(arg) == 8: excoutfilename = pywikibot.input( u'Please enter the filename to save the excepted titles:') else: excoutfilename = arg[9:] elif arg.startswith('-savenew'): append = False if len(arg) == 8: filename = pywikibot.input( u'Please enter the filename to save the titles' + u'\n(will be deleted if exists):') else: filename = arg[9:] elif arg.startswith('-save'): if len(arg) == 5: filename = pywikibot.input( u'Please enter the filename to save the titles:') else: filename = arg[6:] elif arg.startswith('-replacementfile'): if len(arg) == len('-replacementfile'): replacefile = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacefile = arg[len('-replacementfile') + 1:] try: commandline_replacements.extend([ x.lstrip(u'\uFEFF').rstrip('\r\n') for x in codecs.open(replacefile, 'r', 'utf-8') ]) except IOError: raise pywikibot.Error('\n%s cannot be opened. Try again :-)' % replacefile) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): editSummary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-query:'): maxquerysize = int(arg[7:]) else: if not genFactory.handleArg(arg): commandline_replacements.append(arg) if pywikibot.verbose: pywikibot.output(u"commandline_replacements: " + ', '.join(commandline_replacements)) if (len(commandline_replacements) % 2): raise pywikibot.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix is None): replacements.append( (commandline_replacements[0], commandline_replacements[1])) if not summary_commandline: editSummary = i18n.twtranslate( pywikibot.getSite(), 'replace-replacing', { 'description': ' (-%s +%s)' % (commandline_replacements[0], commandline_replacements[1]) }) elif (len(commandline_replacements) > 1): if (fix is None): for i in xrange(0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if not summary_commandline: pairs = [(commandline_replacements[i], commandline_replacements[i + 1]) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) editSummary = i18n.twtranslate( pywikibot.getSite(), 'replace-replacing', {'description': replacementsDescription}) else: raise pywikibot.Error( 'Specifying -fix with replacements is undefined') elif fix is None: old = pywikibot.input( u'Please enter the text that should be replaced:') new = pywikibot.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = pywikibot.input( u'Please enter another text that should be replaced,\n' u'or press Enter to start:') if old == '': change += ')' break new = i18n.input('pywikibot-enter-new-text') change += ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline: default_summary_message = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': change}) pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( u'Press Enter to use this default message, or enter a ' + u'description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message editSummary = summary_message else: # Perform one of the predefined actions. fixname = fix # Save the name for passing to exceptions function. try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if "regex" in fix: regex = fix['regex'] if "msg" in fix: if isinstance(fix['msg'], basestring): editSummary = i18n.twtranslate(pywikibot.getSite(), str(fix['msg'])) else: editSummary = pywikibot.translate(pywikibot.getSite(), fix['msg']) if "exceptions" in fix: exceptions = fix['exceptions'] # Try to append common extensions for multiple fixes. # It must be either a dictionary or a function that returns a dict. if 'include' in exceptions: incl = exceptions['include'] if callable(incl): baseExcDict = incl(fixname) else: try: baseExcDict = incl except NameError: pywikibot.output( u'\nIncluded exceptions dictionary does not exist.' u' Continuing with the exceptions\ngiven in fix.\n' ) baseExcDict = None if baseExcDict: for l in baseExcDict: try: exceptions[l].extend(baseExcDict[l]) except KeyError: exceptions[l] = baseExcDict[l] if "recursive" in fix: recursive = fix['recursive'] if "nocase" in fix: caseInsensitive = fix['nocase'] try: replacements = fix['replacements'] # enable regex/replacements as a dictionary for different langs if isinstance(replacements, dict): replacements = replacements[pywikibot.getSite().lang] except KeyError: pywikibot.output(u"No replacements given in fix.") return # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in [ 'title', 'require-title', 'text-contains', 'inside' ]: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [ pywikibot.Page(pywikibot.getSite(), PageTitle) for PageTitle in PageTitles ] gen = iter(pages) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=maxquerysize) # Finally we open the file for page titles or set parameter article to None if filename: try: # This opens in strict error mode, that means bot will stop # on encoding errors with ValueError. # See http://docs.python.org/library/codecs.html#codecs.open titlefile = codecs.open(filename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(append)) except IOError: pywikibot.output("%s cannot be opened for writing." % filename) return # The same process with exceptions file: if excoutfilename: try: excoutfile = codecs.open( excoutfilename, encoding='utf-8', mode=(lambda x: x and 'a' or 'w')(excappend)) except IOError: pywikibot.output("%s cannot be opened for writing." % excoutfilename) return bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, editSummary, titlefile, excoutfile) try: bot.run() finally: # Just for the spirit of programming (they were flushed) if titlefile: titlefile.close() if excoutfile: excoutfile.close()
def main(*args): pywikibot.output('Starting hewiki-replacebot') editSummary=replaceConfig.defaultSummary xmlFilename=None for arg in pywikibot.handleArgs(*args): if arg.startswith('-summary:'): editSummary = arg[9:] elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] if xmlFilename==None: pywikibot.output('no xml dump specified. please fill -xml and the xml file to be used') return replaceDict,exceptReplace=fillReplementsDict() try: xmlStart except NameError: xmlStart = None safeTemplates=replaceConfig.safeTemplates #add external links templates genFactory = pagegenerators.GeneratorFactory() for safeCategory in replaceConfig.safeTemplatesCategories: citeTemplates=genFactory.getCategoryGen(safeCategory,-1, True) citeTemplates=[page.title(withNamespace=False) for page in citeTemplates] safeTemplates+=citeTemplates fileUsageRgx=re.compile(replaceConfig.fileUsageRgx,re.I) yiRgx=re.compile('\[\[yi:.*?\]\]') safeTemplatesRgx=re.compile(u'\{\{('+string.join(safeTemplates,u'|')+').*?\}\}',re.I) exceptions = { 'title': [], 'text-contains': [re.compile(replaceConfig.redirectRgx,re.I)], 'inside': [fileUsageRgx,safeTemplatesRgx, re.compile(u'('+string.join(exceptReplace,u'|')+')'),yiRgx], 'inside-tags': ['nowiki','math','comment','pre','source','hyperlink','gallery'], 'require-title': [], } gen = XmlDumpReplacePageGeneratorHe(replaceDict, xmlFilename, xmlStart, exceptions) genFactory.namespaces=replaceConfig.namespaces #For debugging pupose, uncomment it to work on specific page #pages = [pywikibot.Page(pywikibot.getSite(), PageTitle) # for PageTitle in [u'PAGENAME']] #gen = iter(pages) #end of specific page maxquerysize=60 gen = genFactory.getCombinedGenerator(gen) preloadingGen = pagegenerators.PreloadingGenerator(gen,pageNumber=maxquerysize) gen=pagegenerators.EdittimeFilterPageGenerator(preloadingGen, endtime=datetime.datetime.utcnow()-datetime.timedelta(days=1)) pywikibot.output('starting replace') bot=ReplaceRobotHe(gen,replaceDict,exceptions,editSummary) bot.run() pywikibot.output('finished all replacements')
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None pagename = None for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(':') option = arg[1:] # bot options if arg == 'do': action = 'double' elif arg == 'br': action = 'broken' elif arg in ('both', 'broken', 'double'): action = arg elif option in ('always', 'delete'): options[option] = True elif option == 'total': options['number'] = number = int(value) # generator options elif option == 'fullscan': fullscan = True elif option == 'xml': xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') elif option == 'moves': moved_pages = True elif option == 'namespace': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = value or i18n.input('pywikibot-enter-namespace-number') # TODO: at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif option == 'offset': offset = int(value) elif option == 'start': start = value elif option == 'until': until = value elif option == 'page': pagename = value # deprecated or unknown options elif option == 'step': issue_deprecation_warning('The usage of "{0}"'.format(arg), 2, ArgumentDeprecationWarning) else: pywikibot.output(u'Unknown argument: {0!s}'.format(arg)) if not action or xmlFilename and (moved_pages or fullscan): problems = [] if xmlFilename and moved_pages: problems += ['Either use a XML file or the moved pages from the API'] if xmlFilename and fullscan: problems += ['Either use a XML file or do a full scan using the API'] pywikibot.bot.suggest_help(additional_text='\n'.join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, pagename) bot = RedirectRobot(action, gen, **options) bot.run()
def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output( u'Skipping %s because it contains text that is on the exceptions list.' % page.title(asLink=True)) break new_text = self.doReplacements(new_text) if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if self.recursive: newest_text = self.doReplacements(new_text) while (newest_text!=new_text): new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = page.categories(nofollow_redirects=True) if self.addedCat not in cats: cats.append(self.addedCat) new_text = pywikibot.replaceCategoryLinks(new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.acceptall: break choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( page.site.hostname(), page.site.nice_get_address(page.title()) )) i18n.input('pywikibot-enter-finished-browser') try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'q': return if choice == 'a': self.acceptall = True if choice == 'y': page.put_async(new_text, self.summary) # choice must be 'N' break if self.acceptall and new_text != original_text: try: page.put(new_text, self.summary) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) except pywikibot.SpamfilterError, e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved, error: pywikibot.output(u'Error putting page: %s' % (error.args,)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ gen = None xmlFilename = None HTTPignore = [] day = 7 if isinstance(memento_client, ImportError): warn('memento_client not imported: %s' % memento_client, ImportWarning) # Process global args and prepare generator args parser local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if arg == '-talk': config.report_dead_links_on_talk = True elif arg == '-notalk': config.report_dead_links_on_talk = False elif arg == '-repeat': gen = RepeatPageGenerator() elif arg.startswith('-ignore:'): HTTPignore.append(int(arg[8:])) elif arg.startswith('-day:'): day = int(arg[5:]) elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] else: genFactory.handleArg(arg) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpPageGenerator(xmlFilename, xmlStart, genFactory.namespaces) if not gen: gen = genFactory.getCombinedGenerator() if gen: # fetch at least 240 pages simultaneously from the wiki, but more if # a high thread number is set. pageNumber = max(240, config.max_external_links * 2) gen = pagegenerators.PreloadingGenerator(gen, step=pageNumber) gen = pagegenerators.RedirectFilterPageGenerator(gen) bot = WeblinkCheckerRobot(gen, HTTPignore, day) try: bot.run() finally: waitTime = 0 # Don't wait longer than 30 seconds for threads to finish. while countLinkCheckThreads() > 0 and waitTime < 30: try: pywikibot.output(u"Waiting for remaining %i threads to " u"finish, please wait..." % countLinkCheckThreads()) # wait 1 second time.sleep(1) waitTime += 1 except KeyboardInterrupt: pywikibot.output(u'Interrupted.') break if countLinkCheckThreads() > 0: pywikibot.output(u'Remaining %i threads will be killed.' % countLinkCheckThreads()) # Threads will die automatically because they are daemonic. if bot.history.reportThread: bot.history.reportThread.shutdown() # wait until the report thread is shut down; the user can # interrupt it by pressing CTRL-C. try: while bot.history.reportThread.isAlive(): time.sleep(0.1) except KeyboardInterrupt: pywikibot.output(u'Report thread interrupted.') bot.history.reportThread.kill() pywikibot.output(u'Saving history...') bot.history.save() return True else: pywikibot.bot.suggest_help(missing_generator=True) return False
def main(*args: str) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ options = {} gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] file_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None xmlStart = None sql_query = None # type: Optional[str] # Set the default regular expression flags flags = 0 # Request manual replacements even if replacements are already defined manual_input = False # Read commandline parameters. genFactory = pagegenerators.GeneratorFactory( disabled_options=['mysqlquery']) local_args = pywikibot.handle_args(args) local_args = genFactory.handle_args(local_args) local_args, exceptions = handle_exceptions(*local_args) for arg in local_args: opt, _, value = arg.partition(':') if opt == '-regex': regex = True elif opt == '-xmlstart': xmlStart = value or pywikibot.input( 'Please enter the dumped article to start with:') elif opt == '-xml': xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') elif opt == '-mysqlquery': sql_query = value elif opt == '-fix': fixes_set.append(value) elif opt == '-sleep': options['sleep'] = float(value) elif opt in ('-allowoverlap', '-always', '-recursive'): options[opt[1:]] = True elif opt == '-nocase': flags |= re.IGNORECASE elif opt == '-dotall': flags |= re.DOTALL elif opt == '-multiline': flags |= re.MULTILINE elif opt == '-addcat': options['addcat'] = value elif opt == '-summary': edit_summary = value elif opt == '-automaticsummary': edit_summary = True elif opt == '-manualinput': manual_input = True elif opt == '-pairsfile': file_replacements = handle_pairsfile(value) else: commandline_replacements.append(arg) if file_replacements is None: return if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return commandline_replacements += file_replacements if not(commandline_replacements or fixes_set) or manual_input: commandline_replacements += handle_manual() # The summary stored here won't be actually used but is only an example site = pywikibot.Site() single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', {'description': ' (-{} +{})'.format(replacement.old, replacement.new)} ) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {}' .format(', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: {}' .format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix "{}"' .format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], str): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, str): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handle_arg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append( '"{}" (replacement #{})'.format(fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append(ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: {}' .format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif sql_query is not None: # Only -excepttext option is considered by the query. Other # exceptions are taken into account by the ReplaceRobot gen = handle_sql(sql_query, replacements, exceptions['text-contains']) gen = genFactory.getCombinedGenerator(gen, preload=True) if pywikibot.bot.suggest_help(missing_generator=not gen): return bot = ReplaceRobot(gen, replacements, exceptions, site=site, summary=edit_summary, **options) site.login() bot.run()
def main(*args): # read command line parameters # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None step = None always = False delete = False for arg in pywikibot.handleArgs(*args): if arg == 'double' or arg == 'do': action = 'double' elif arg == 'broken' or arg == 'br': action = 'broken' elif arg == 'both': action = 'both' elif arg == '-fullscan': fullscan = True elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-moves'): moved_pages = True elif arg.startswith('-namespace:'): ns = arg[11:] if ns == '': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = i18n.input('pywikibot-enter-namespace-number') # TODO! at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif arg.startswith('-offset:'): offset = int(arg[8:]) elif arg.startswith('-start:'): start = arg[7:] elif arg.startswith('-until:'): until = arg[7:] elif arg.startswith('-total:'): number = int(arg[7:]) elif arg.startswith('-step:'): step = int(arg[6:]) elif arg == '-always': always = True elif arg == '-delete': delete = True else: pywikibot.output(u'Unknown argument: %s' % arg) if ( not action or xmlFilename and moved_pages or fullscan and xmlFilename ): pywikibot.showHelp() else: gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step) bot = RedirectRobot(action, gen, always, number, delete) bot.run()
def main(*args) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: str """ options = {} # type: Dict[str, Any] gen_options = {} # type: Dict[str, Any] # what the bot should do (either resolve double redirs, or process broken # redirs) action = None source = set() gen_factory = pagegenerators.GeneratorFactory() local_args = pywikibot.handle_args(args) for argument in local_args: arg, sep, value = argument.partition(':') option = arg.partition('-')[2] # bot options if arg == 'do': action = 'double' elif arg == 'br': action = 'broken' elif arg in ('both', 'broken', 'double'): action = arg elif option in ('always', 'delete'): options[option] = True elif option == 'sdtemplate': options['sdtemplate'] = value or pywikibot.input( 'Which speedy deletion template to use?') # generator options elif option in ('fullscan', 'moves'): gen_options[option] = True source.add(arg) elif option == 'xml': gen_options[option] = value or i18n.input( 'pywikibot-enter-xml-filename') source.add(arg) elif option == 'offset': gen_options[option] = int(value) elif option in ('start', 'until'): gen_options[option] = value elif option == 'limit': options['limit'] = gen_options['limit'] = int(value) elif gen_factory.handle_arg(argument): pass else: pywikibot.output('Unknown argument: ' + arg) if len(source) > 1: problem = 'You can only use one of {0} options.'.format( ' or '.join(source)) pywikibot.bot.suggest_help(additional_text=problem, missing_action=not action) return if not action: pywikibot.bot.suggest_help(missing_action=True) return gen = None if not gen_factory.gens: if gen_factory.namespaces: gen_options['namespaces'] = gen_factory.namespaces gen = RedirectGenerator(action, **gen_options) options['generator'] = gen_factory.getCombinedGenerator(gen=gen) bot = RedirectRobot(action, **options) bot.run()
def handleArg(self, arg): """Parse one argument at a time. If it is recognized as an argument that specifies a generator, a generator is created and added to the accumulation list, and the function returns true. Otherwise, it returns false, so that caller can try parsing the argument. Call getCombinedGenerator() after all arguments have been parsed to get the final output generator. """ site = pywikibot.getSite() gen = None if arg.startswith('-filelinks'): fileLinksPageTitle = arg[11:] if not fileLinksPageTitle: fileLinksPageTitle = i18n.input( 'pywikibot-enter-file-links-processing') if fileLinksPageTitle.startswith(site.namespace(6) + ":"): fileLinksPage = pywikibot.ImagePage(site, fileLinksPageTitle) else: fileLinksPage = pywikibot.ImagePage(site, 'Image:' + fileLinksPageTitle) gen = FileLinksGenerator(fileLinksPage) elif arg.startswith('-unusedfiles'): if len(arg) == 12: gen = UnusedFilesGenerator() else: gen = UnusedFilesGenerator(number=int(arg[13:])) elif arg.startswith('-unwatched'): if len(arg) == 10: gen = UnwatchedPagesPageGenerator() else: gen = UnwatchedPagesPageGenerator(number=int(arg[11:])) elif arg.startswith('-usercontribs'): args = arg[14:].split(';') number = None try: number = int(args[1]) except: number = 250 gen = UserContributionsGenerator(args[0], number, namespaces=self.getNamespaces) elif arg.startswith('-withoutinterwiki'): if len(arg) == 17: gen = WithoutInterwikiPageGenerator() else: gen = WithoutInterwikiPageGenerator(number=int(arg[18:])) elif arg.startswith('-interwiki'): title = arg[11:] if not title: title = i18n.input('pywikibot-enter-page-processing') page = pywikibot.Page(site, title) gen = InterwikiPageGenerator(page) elif arg.startswith('-randomredirect'): if len(arg) == 15: gen = RandomRedirectPageGenerator() else: gen = RandomRedirectPageGenerator(number=int(arg[16:])) elif arg.startswith('-random'): if len(arg) == 7: gen = RandomPageGenerator() else: gen = RandomPageGenerator(number=int(arg[8:])) elif arg.startswith('-recentchanges'): if len(arg) >= 15: gen = RecentchangesPageGenerator(number=int(arg[15:]), nobots=False) else: gen = RecentchangesPageGenerator(nobots=False) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-rc-nobots'): if len(arg) >= 11: gen = RecentchangesPageGenerator(number=int(arg[11:]), nobots=True) else: gen = RecentchangesPageGenerator(nobots=True) gen = DuplicateFilterPageGenerator(gen) elif arg.startswith('-file'): textfilename = arg[6:] if not textfilename: textfilename = pywikibot.input( u'Please enter the local file name:') gen = TextfilePageGenerator(textfilename) elif arg.startswith('-namespace'): if len(arg) == len('-namespace'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-namespace:'):].split(",")) return True elif arg.startswith('-ns'): if len(arg) == len('-ns'): self.namespaces.append( pywikibot.input(u'What namespace are you filtering on?')) else: self.namespaces.extend(arg[len('-ns:'):].split(",")) return True elif arg.startswith('-limit'): if len(arg) == len('-limit'): self.limit = int(pywikibot.input("What is the limit value?")) else: self.limit = int(arg[len('-limit:'):]) return True elif arg.startswith('-catr'): gen = self.getCategoryGen(arg, len('-catr'), recurse=True) elif arg.startswith('-category'): gen = self.getCategoryGen(arg, len('-category')) elif arg.startswith('-cat'): gen = self.getCategoryGen(arg, len('-cat')) elif arg.startswith('-subcatsr'): gen = self.setSubCategoriesGen(arg, 9, recurse=True) elif arg.startswith('-subcats'): gen = self.setSubCategoriesGen(arg, 8) elif arg.startswith('-page'): if len(arg) == len('-page'): gen = [pywikibot.Page(site, pywikibot.input( u'What page do you want to use?'))] else: gen = [pywikibot.Page(site, arg[len('-page:'):])] elif arg.startswith('-uncatfiles'): gen = UnCategorizedImageGenerator() elif arg.startswith('-uncatcat'): gen = UnCategorizedCategoryGenerator() elif arg.startswith('-uncattemplates'): gen = UnCategorizedTemplatesGenerator() elif arg.startswith('-uncat'): gen = UnCategorizedPageGenerator() elif arg.startswith('-ref'): referredPageTitle = arg[5:] if not referredPageTitle: referredPageTitle = pywikibot.input( u'Links to which page should be processed?') referredPage = pywikibot.Page(site, referredPageTitle) gen = ReferringPageGenerator(referredPage) elif arg.startswith('-links'): linkingPageTitle = arg[7:] if not linkingPageTitle: linkingPageTitle = pywikibot.input( u'Links from which page should be processed?') linkingPage = pywikibot.Page(site, linkingPageTitle) gen = LinkedPageGenerator(linkingPage) elif arg.startswith('-weblink'): url = arg[9:] if not url: url = pywikibot.input( u'Pages with which weblink should be processed?') gen = LinksearchPageGenerator(url) elif arg.startswith('-transcludes'): transclusionPageTitle = arg[len('-transcludes:'):] if not transclusionPageTitle: transclusionPageTitle = pywikibot.input( u'Pages that transclude which page should be processed?') transclusionPage = pywikibot.Page(site, "%s:%s" % (site.namespace(10), transclusionPageTitle)) gen = ReferringPageGenerator(transclusionPage, onlyTemplateInclusion=True) elif arg.startswith('-gorandom'): for firstPage in RandomPageGenerator(number=1): firstPageTitle = firstPage.title() namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page(site, firstPageTitle ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-start'): firstPageTitle = arg[7:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') if self.namespaces != []: namespace = self.namespaces[0] else: namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page(site, firstPageTitle ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects=False) elif arg.startswith('-redirectonly'): firstPageTitle = arg[14:] if not firstPageTitle: firstPageTitle = pywikibot.input( u'At which page do you want to start?') namespace = pywikibot.Page(site, firstPageTitle).namespace() firstPageTitle = pywikibot.Page(site, firstPageTitle ).title(withNamespace=False) gen = AllpagesPageGenerator(firstPageTitle, namespace, includeredirects='only') elif arg.startswith('-prefixindex'): prefix = arg[13:] namespace = None if not prefix: prefix = pywikibot.input( u'What page names are you looking for?') gen = PrefixingPageGenerator(prefix=prefix) elif arg.startswith('-newimages'): limit = arg[11:] or pywikibot.input( u'How many images do you want to load?') gen = NewimagesPageGenerator(number=int(limit)) elif arg == ('-new') or arg.startswith('-new:'): if len(arg) >= 5: gen = NewpagesPageGenerator(number=int(arg[5:])) else: gen = NewpagesPageGenerator(number=60) elif arg.startswith('-imagelinks'): imagelinkstitle = arg[len('-imagelinks:'):] if not imagelinkstitle: imagelinkstitle = pywikibot.input( u'Images on which page should be processed?') imagelinksPage = pywikibot.Page(site, imagelinkstitle) gen = ImagesPageGenerator(imagelinksPage) elif arg.startswith('-search'): mediawikiQuery = arg[8:] if not mediawikiQuery: mediawikiQuery = pywikibot.input( u'What do you want to search for?') gen = SearchPageGenerator(mediawikiQuery, number=None, namespaces=self.getNamespaces) elif arg.startswith('-titleregex'): if len(arg) == 11: regex = pywikibot.input(u'What page names are you looking for?') else: regex = arg[12:] gen = RegexFilterPageGenerator(site.allpages(), [regex]) elif arg.startswith('-yahoo'): gen = YahooSearchPageGenerator(arg[7:]) elif arg.startswith('-'): mode, log, user = arg.partition('log') # exclude -log, -nolog if log == 'log' and mode not in ['-', '-no']: number = 500 if not user: user = None else: try: number = int(user[1:]) user = None except ValueError: user = user[1:] if user: result = user.split(';') user = result[0] try: number = int(result[1]) except: pass gen = LogpagesPageGenerator(number, mode[1:], user) if gen: self.gens.append(gen) return self.getCombinedGenerator() else: return False
def main(*args): # read command line parameters # what the bot should do (either resolve double redirs, or delete broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = '' until = '' number = None step = None always = False delete = False for arg in pywikibot.handleArgs(*args): if arg == 'double' or arg == 'do': action = 'double' elif arg == 'broken' or arg == 'br': action = 'broken' elif arg == 'both': action = 'both' elif arg == '-fullscan': fullscan = True elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith('-moves'): moved_pages = True elif arg.startswith('-namespace:'): ns = arg[11:] if ns == '': # "-namespace:" does NOT yield -namespace:0 further down the road! ns = i18n.input('pywikibot-enter-namespace-number') # TODO! at least for some generators enter a namespace by its name # or number if ns == '': ns = '0' try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif arg.startswith('-offset:'): offset = int(arg[8:]) elif arg.startswith('-start:'): start = arg[7:] elif arg.startswith('-until:'): until = arg[7:] elif arg.startswith('-total:'): number = int(arg[7:]) elif arg.startswith('-step:'): step = int(arg[6:]) elif arg == '-always': always = True elif arg == '-delete': delete = True else: pywikibot.output(u'Unknown argument: %s' % arg) if (not action or xmlFilename and moved_pages or fullscan and xmlFilename): pywikibot.showHelp() else: gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, step) bot = RedirectRobot(action, gen, always, number, delete) bot.run()
def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not (self.articles or page.canBeEdited()): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output( u'Skipping %s because it contains text that is on the exceptions list.' % page.title(asLink=True)) break new_text = self.doReplacements(new_text) if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if self.recursive: newest_text = self.doReplacements(new_text) while (newest_text!=new_text): new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = page.categories() if self.addedCat not in cats: cats.append(self.addedCat) new_text = pywikibot.replaceCategoryLinks(new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.acceptall: break if self.exctitles: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'no+eXcept', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'x', 'e', 'b', 'a', 'q'], 'N') else: choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( page.site.hostname(), page.site.nice_get_address(page.title()) )) i18n.input('pywikibot-enter-finished-browser') try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'q': self.writeEditCounter() self.writeExceptCounter() return if choice == 'a': self.acceptall = True if choice == 'x': #May happen only if self.exctitles isn't None self.exctitles.write( u"ur'^%s$',\n" % re.escape(page.title())) self.exctitles.flush() self.exceptcounter += 1 if choice == 'y': if not self.articles: # Primary behaviour: working on wiki page.put_async(new_text, self.editSummary) self.editcounter += 1 # Bug: this increments even if put_async fails # This is separately in two clauses of if for # future purposes to get feedback form put_async else: #Save the title for later processing instead of editing self.editcounter += 1 self.articles.write(u'#%s\n%s' % (page.title(asLink=True, textlink=True), self.splitLine())) self.articles.flush() # For the peace of our soul :-) # choice must be 'N' break if self.acceptall and new_text != original_text: if not self.articles: #Primary behaviour: working on wiki try: page.put(new_text, self.editSummary) self.editcounter += 1 #increment only on success except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(),)) except pywikibot.SpamfilterError, e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved, error: pywikibot.output(u'Error putting page: %s' % (error.args,)) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(),))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ options = {} # what the bot should do (either resolve double redirs, or process broken # redirs) action = None # where the bot should get his infos from (either None to load the # maintenance special page from the live wiki, or the filename of a # local XML dump file) xmlFilename = None # Which namespace should be processed when using a XML dump # default to -1 which means all namespaces will be processed namespaces = [] # at which redirect shall we start searching double redirects again # (only with dump); default to -1 which means all redirects are checked offset = -1 moved_pages = False fullscan = False start = "" until = "" number = None pagename = None for arg in pywikibot.handle_args(args): arg, sep, value = arg.partition(":") option = arg[1:] # bot options if arg == "do": action = "double" elif arg == "br": action = "broken" elif arg in ("both", "broken", "double"): action = arg elif option in ("always", "delete"): options[option] = True elif option == "total": options["number"] = number = int(value) # generator options elif option == "fullscan": fullscan = True elif option == "xml": xmlFilename = value or i18n.input("pywikibot-enter-xml-filename") elif option == "moves": moved_pages = True elif option == "namespace": # "-namespace:" does NOT yield -namespace:0 further down the road! ns = value or i18n.input("pywikibot-enter-namespace-number") # TODO: at least for some generators enter a namespace by its name # or number if ns == "": ns = "0" try: ns = int(ns) except ValueError: # -namespace:all Process all namespaces. # Only works with the API read interface. pass if ns not in namespaces: namespaces.append(ns) elif option == "offset": offset = int(value) elif option == "start": start = value elif option == "until": until = value elif option == "page": pagename = value # deprecated or unknown options elif option == "step": issue_deprecation_warning('The usage of "{0}"'.format(arg), 2, ArgumentDeprecationWarning) else: pywikibot.output("Unknown argument: %s" % arg) if not action or xmlFilename and (moved_pages or fullscan): problems = [] if xmlFilename and moved_pages: problems += ["Either use a XML file or the moved pages from the API"] if xmlFilename and fullscan: problems += ["Either use a XML file or do a full scan using the API"] pywikibot.bot.suggest_help(additional_text="\n".join(problems), missing_action=not action) else: pywikibot.Site().login() gen = RedirectGenerator(xmlFilename, namespaces, offset, moved_pages, fullscan, start, until, number, pagename) bot = RedirectRobot(action, gen, **options) bot.run()
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = "" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { "title": [], "text-contains": [], "inside": [], "inside-tags": [], "require-title": [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == "-regex": regex = True elif arg.startswith("-xmlstart"): if len(arg) == 9: xmlStart = pywikibot.input("Please enter the dumped article to start with:") else: xmlStart = arg[10:] elif arg.startswith("-xml"): if len(arg) == 4: xmlFilename = i18n.input("pywikibot-enter-xml-filename") else: xmlFilename = arg[5:] elif arg == "-sql": useSql = True elif arg.startswith("-excepttitle:"): exceptions["title"].append(arg[13:]) elif arg.startswith("-requiretitle:"): exceptions["require-title"].append(arg[14:]) elif arg.startswith("-excepttext:"): exceptions["text-contains"].append(arg[12:]) elif arg.startswith("-exceptinside:"): exceptions["inside"].append(arg[14:]) elif arg.startswith("-exceptinsidetag:"): exceptions["inside-tags"].append(arg[17:]) elif arg.startswith("-fix:"): fixes_set += [arg[5:]] elif arg.startswith("-sleep:"): sleep = float(arg[7:]) elif arg == "-always": acceptall = True elif arg == "-recursive": recursive = True elif arg == "-nocase": caseInsensitive = True elif arg == "-dotall": dotall = True elif arg == "-multiline": multiline = True elif arg.startswith("-addcat:"): add_cat = arg[8:] elif arg.startswith("-summary:"): edit_summary = arg[9:] elif arg.startswith("-automaticsummary"): edit_summary = True elif arg.startswith("-allowoverlap"): allowoverlap = True elif arg.startswith("-manualinput"): manual_input = True elif arg.startswith("-replacementfile"): issue_deprecation_warning("-replacementfile", "-pairsfile", 2, ArgumentDeprecationWarning) elif arg.startswith("-pairsfile"): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == "-pairsfile": replacement_file = pywikibot.input("Please enter the filename to read replacements from:") else: replacement_file = arg[len("-pairsfile:") :] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error("Incomplete command line pattern replacement pair.") return False if replacement_file_arg_misplaced: pywikibot.error("-pairsfile used between a pattern replacement pair.") return False if replacement_file: try: with codecs.open(replacement_file, "r", "utf-8") as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error("Error loading {0}: {1}".format(replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error("{0} contains an incomplete pattern replacement pair.".format(replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip("\uFEFF") commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input("Please enter the text that should be replaced:") while old: new = pywikibot.input("Please enter the new text:") commandline_replacements += [old, new] old = pywikibot.input("Please enter another text that should be replaced," "\nor press Enter to start:") # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, "replace-replacing", {"description": " (-%s +%s)" % (replacement.old, replacement.new)} ) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output("Available predefined fixes are: %s" % ", ".join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output("The user fixes file could not be found: " "{0}".format(fixes.filename)) return if not fix["replacements"]: pywikibot.warning("No replacements defined for fix " '"{0}"'.format(fix_name)) continue if "msg" in fix: if isinstance(fix["msg"], basestring): set_summary = i18n.twtranslate(site, str(fix["msg"])) else: set_summary = i18n.translate(site, fix["msg"], fallback=True) else: set_summary = None replacement_set = ReplacementList( fix.get("regex"), fix.get("exceptions"), fix.get("nocase"), set_summary, name=fix_name ) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix["replacements"], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format(fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning( 'The old string "{0}" contains formatting ' "characters like U+200E".format(chars.replace_invisible(replacement[0])) ) if not callable(replacement[1]) and chars.contains_invisible(replacement[1]): pywikibot.warning( 'The new string "{0}" contains formatting ' "characters like U+200E".format(chars.replace_invisible(replacement[1])) ) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary ) ) if replacement_set: replacements.extend(replacement_set) if len(fix["replacements"]) == len(missing_fix_summaries): missing_fixes_summaries.append('"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if (not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary): if single_summary: pywikibot.output( "The summary message for the command line " "replacements will be something like: %s" % single_summary ) if missing_fixes_summaries: pywikibot.output( "The summary will not be used when the fix has " "one defined but the following fix(es) do(es) not " "have a summary defined: " "{0}".format(", ".join(missing_fixes_summaries)) ) if edit_summary is not True: edit_summary = pywikibot.input( "Press Enter to use this automatic message, or enter a " "description of the\nchanges your bot will make:" ) else: edit_summary = "" # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = "WHERE (%s)" % " OR ".join( [ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ] ) if exceptions: exceptClause = "AND NOT (%s)" % " OR ".join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions] ) else: exceptClause = "" query = """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % ( whereClause, exceptClause, ) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return False preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot( preloadingGen, replacements, exceptions, allowoverlap, recursive, add_cat, sleep, edit_summary, always=acceptall, site=site, ) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output("\n%s pages changed." % bot.changed_pages)
def run(self): """ Starts the bot. """ # Run the generator which will yield Pages which might need to be # changed. for page in self.generator: if self.isTitleExcepted(page.title()): pywikibot.output( u'Skipping %s because the title is on the exceptions list.' % page.title(asLink=True)) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): pywikibot.output(u"You can't edit page %s" % page.title(asLink=True)) continue except pywikibot.NoPage: pywikibot.output(u'Page %s not found' % page.title(asLink=True)) continue new_text = original_text while True: if self.isTextExcepted(new_text): pywikibot.output(u'Skipping %s because it contains text ' u'that is on the exceptions list.' % page.title(asLink=True)) break new_text = self.doReplacements(new_text) if new_text == original_text: pywikibot.output(u'No changes were necessary in %s' % page.title(asLink=True)) break if self.recursive: newest_text = self.doReplacements(new_text) while (newest_text != new_text): new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = page.categories(nofollow_redirects=True) if self.addedCat not in cats: cats.append(self.addedCat) new_text = pywikibot.replaceCategoryLinks( new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. pywikibot.output( u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) pywikibot.showDiff(original_text, new_text) if self.acceptall: break choice = pywikibot.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', 'Quit'], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open( "http://%s%s" % (page.site.hostname(), page.site.nice_get_address(page.title(asUrl=True)))) i18n.input('pywikibot-enter-finished-browser') try: original_text = page.get(get_redirect=True, force=True) except pywikibot.NoPage: pywikibot.output(u'Page %s has been deleted.' % page.title()) break new_text = original_text continue if choice == 'q': return if choice == 'a': self.acceptall = True if choice == 'y': page.put_async(new_text, self.summary) # choice must be 'N' break if self.acceptall and new_text != original_text: try: page.put(new_text, self.summary) except pywikibot.EditConflict: pywikibot.output(u'Skipping %s because of edit conflict' % (page.title(), )) except pywikibot.SpamfilterError as e: pywikibot.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except pywikibot.PageNotSaved as error: pywikibot.output(u'Error putting page: %s' % (error.args, )) except pywikibot.LockedPage: pywikibot.output(u'Skipping %s (locked page)' % (page.title(), ))
def main(*args): pywikibot.output('Starting hewiki-replacebot') edit_summary = replaceConfig.defaultSummary xml_filename = None xml_start = None title_check_page = None gen = None gen_factory = pywikibot.pagegenerators.GeneratorFactory() local_args = pywikibot.handle_args(args) for arg in local_args: if gen_factory.handle_arg(arg): continue elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-xmlstart'): if len(arg) == 9: xml_start = pywikibot.input( 'Please enter the dumped article to start with:') else: xml_start = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xml_filename = i18n.input('pywikibot-enter-xml-filename') else: xml_filename = arg[5:] elif arg.startswith('-titlecheck'): title_check_page = arg[12:] replace_dict = fill_replacements_dict() safe_templates = replaceConfig.safeTemplates # add external links templates site = pywikibot.Site() site.login() for safeCategory in replaceConfig.safeTemplatesCategories: cite_templates = pywikibot.Category(site, safeCategory).articles( namespaces=10, recurse=True) cite_templates = [page.title(with_ns=False) for page in cite_templates] safe_templates += cite_templates safe_templates = list(set(a for a in safe_templates if '/' not in a)) file_usage_rgx = re.compile(replaceConfig.fileUsageRgx, re.I) yi_rgx = re.compile('\[\[yi:.*?\]\]') safe_templates_rgx = re.compile( '\{\{(' + '|'.join(set(safe_templates)) + ').*?\}\}', re.I) exceptions = { 'title': [], 'text-contains': [re.compile(replaceConfig.redirectRgx, re.I)], 'inside': [file_usage_rgx, safe_templates_rgx, yi_rgx], 'inside-tags': [ 'nowiki', 'math', 'comment', 'pre', 'source', 'hyperlink', 'gallery', 'interwiki', 'templatedata', 'syntaxhighlight' ], 'require-title': [], } # avoid searching in other namespaces in the xml exceptions_with_title_ns = dict(exceptions) exceptions_with_title_ns['title'] = [ re.compile('^' + re.escape(ns_name) + ':') for ns_index, ns in site.namespaces.items() if ns_index not in replaceConfig.namespaces for ns_name in ns ] if title_check_page: check_titles(site, title_check_page, replace_dict) if xml_filename: gen = XmlDumpReplacePageGeneratorHe(replace_dict, xml_filename, xml_start, exceptions_with_title_ns, site) gen = gen_factory.getCombinedGenerator(gen) if not gen: pywikibot.output( 'no xml dump specified. please fill -xml and the xml file to be used, or other generator' ) pywikibot.bot.suggest_help(missing_generator=True) return False gen = pywikibot.pagegenerators.NamespaceFilterPageGenerator( gen, replaceConfig.namespaces, site) gen = pywikibot.pagegenerators.PreloadingGenerator(gen) pywikibot.output('starting replace') bot = ReplaceRobotHe(gen, replace_dict, exceptions, edit_summary) bot.run() pywikibot.output('finished all replacements')
def main(*args): add_cat = None gen = None # summary message summary_commandline = False edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Read commandline parameters. local_args = pywikibot.handleArgs(*args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append( pywikibot.input(u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True else: commandline_replacements.append(arg) pywikibot.Site().login() gen = genFactory.getCombinedGenerator() if (len(commandline_replacements) % 2): raise pywikibot.Error('require even number of replacements.') elif (len(commandline_replacements) == 2 and fix is None): replacements.append( (commandline_replacements[0], commandline_replacements[1])) if not summary_commandline: edit_summary = i18n.twtranslate( pywikibot.Site(), 'replace-replacing', { 'description': ' (-%s +%s)' % (commandline_replacements[0], commandline_replacements[1]) }) elif (len(commandline_replacements) > 1): if (fix is None): for i in range(0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if not summary_commandline: pairs = [(commandline_replacements[i], commandline_replacements[i + 1]) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) edit_summary = i18n.twtranslate( pywikibot.Site(), 'replace-replacing', {'description': replacementsDescription}) else: raise pywikibot.Error( 'Specifying -fix with replacements is undefined') elif fix is None: old = pywikibot.input( u'Please enter the text that should be replaced:') new = pywikibot.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = pywikibot.input( u'Please enter another text that should be replaced,' + u'\nor press Enter to start:') if old == '': change += ')' break new = i18n.input('pywikibot-enter-new-text') change += ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline: default_summary_message = i18n.twtranslate(pywikibot.Site(), 'replace-replacing', {'description': change}) pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( u'Press Enter to use this default message, or enter a ' + u'description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message edit_summary = summary_message else: # Perform one of the predefined actions. try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if "regex" in fix: regex = fix['regex'] if "msg" in fix: if isinstance(fix['msg'], basestring): edit_summary = i18n.twtranslate(pywikibot.Site(), str(fix['msg'])) else: edit_summary = i18n.translate(pywikibot.Site(), fix['msg'], fallback=True) if "exceptions" in fix: exceptions = fix['exceptions'] if "nocase" in fix: caseInsensitive = fix['nocase'] replacements = fix['replacements'] # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in [ 'title', 'require-title', 'text-contains', 'inside' ]: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [ pywikibot.Page(pywikibot.Site(), PageTitle) for PageTitle in PageTitles ] gen = iter(pages) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary) bot.run()
def test_pagegen_i18n_input(self): """Test i18n.input falls back with missing message package.""" rv = i18n.input('pywikibot-enter-category-name', fallback_prompt='dummy output') self.assertEqual(rv, 'dummy input') self.assertIn('dummy output: ', self.output_text)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-replacementfile': replacement_file = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-replacementfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error( '-replacementfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error(u'Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip(u'\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input( u'Please enter the text that should be replaced:') while old: new = pywikibot.input(u'Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate(site, 'replace-replacing', { 'description': ' (-%s +%s)' % (replacement.old, replacement.new) }) replacements.append(replacement) if not edit_summary: if single_summary: pywikibot.output(u'The summary message for the command line ' 'replacements will be something like: %s' % single_summary) if fixes_set: pywikibot.output('If a summary is defined for the fix, this ' 'default summary won\'t be applied.') edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') # Perform one of the predefined actions. for fix in fixes_set: try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % ', '.join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if "msg" in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary) for replacement in fix['replacements']: summary = None if len(replacement) < 3 else replacement[2] if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if chars.contains_invisible(replacement[1]): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacements.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, site) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
def main(*args): add_cat = None gen = None # summary message summary_commandline = False edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Read commandline parameters. for arg in pywikibot.handleArgs(*args): if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg =='-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(pywikibot.input( u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True else: commandline_replacements.append(arg) pywikibot.Site().login() gen = genFactory.getCombinedGenerator() if (len(commandline_replacements) % 2): raise pywikibot.Error, 'require even number of replacements.' elif (len(commandline_replacements) == 2 and fix == None): replacements.append((commandline_replacements[0], commandline_replacements[1])) if not summary_commandline: edit_summary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': ' (-%s +%s)' % (commandline_replacements[0], commandline_replacements[1])}) elif (len(commandline_replacements) > 1): if (fix == None): for i in xrange (0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if not summary_commandline: pairs = [( commandline_replacements[i], commandline_replacements[i + 1] ) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) edit_summary = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': replacementsDescription}) else: raise pywikibot.Error( 'Specifying -fix with replacements is undefined') elif fix == None: old = pywikibot.input(u'Please enter the text that should be replaced:') new = pywikibot.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = pywikibot.input( u'Please enter another text that should be replaced,' + u'\nor press Enter to start:') if old == '': change += ')' break new = i18n.input('pywikibot-enter-new-text') change += ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline: default_summary_message = i18n.twtranslate(pywikibot.getSite(), 'replace-replacing', {'description': change}) pywikibot.output(u'The summary message will default to: %s' % default_summary_message) summary_message = pywikibot.input( u'Press Enter to use this default message, or enter a ' + u'description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message edit_summary = summary_message else: # Perform one of the predefined actions. try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % fixes.fixes.keys()) return if "regex" in fix: regex = fix['regex'] if "msg" in fix: if isinstance(fix['msg'], basestring): edit_summary = i18n.twtranslate(pywikibot.getSite(), str(fix['msg'])) else: edit_summary = pywikibot.translate(pywikibot.getSite(), fix['msg']) if "exceptions" in fix: exceptions = fix['exceptions'] if "nocase" in fix: caseInsensitive = fix['nocase'] replacements = fix['replacements'] # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in [ 'title', 'require-title', 'text-contains', 'inside']: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [pywikibot.Page(pywikibot.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary) bot.run()