def _generate_diff(self, hunks): """Generate a diff text for the given hunks.""" def extend_context(start, end): """Add context lines.""" return ''.join(' {0}\n'.format(line.rstrip()) for line in self.a[start:end]) context_range = self._get_context_range(hunks) output = color_format( '{aqua}{0}{default}\n{1}', Hunk.get_header_text(*context_range), extend_context(context_range[0][0], hunks[0].a_rng[0])) previous_hunk = None for hunk in hunks: if previous_hunk: output += extend_context(previous_hunk.a_rng[1], hunk.a_rng[0]) previous_hunk = hunk output += hunk.diff_text output += extend_context(hunks[-1].a_rng[1], context_range[0][1]) if self._replace_invisible: output = chars.replace_invisible(output) return output
def _generate_diff(self, hunks): """Generate a diff text for the given hunks.""" def extend_context(start, end): """Add context lines.""" return ''.join(' {0}\n'.format(line.rstrip()) for line in self.a[start:end]) context_range = self._get_context_range(hunks) output = ('\03{aqua}' + Hunk.get_header_text(*context_range) + '\03{default}\n' + extend_context(context_range[0][0], hunks[0].a_rng[0])) previous_hunk = None for hunk in hunks: if previous_hunk: output += extend_context(previous_hunk.a_rng[1], hunk.a_rng[0]) previous_hunk = hunk output += hunk.diff_text output += extend_context(hunks[-1].a_rng[1], context_range[0][1]) if self._replace_invisible: output = chars.replace_invisible(output) return output
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = "" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { "title": [], "text-contains": [], "inside": [], "inside-tags": [], "require-title": [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == "-regex": regex = True elif arg.startswith("-xmlstart"): if len(arg) == 9: xmlStart = pywikibot.input("Please enter the dumped article to start with:") else: xmlStart = arg[10:] elif arg.startswith("-xml"): if len(arg) == 4: xmlFilename = i18n.input("pywikibot-enter-xml-filename") else: xmlFilename = arg[5:] elif arg == "-sql": useSql = True elif arg.startswith("-excepttitle:"): exceptions["title"].append(arg[13:]) elif arg.startswith("-requiretitle:"): exceptions["require-title"].append(arg[14:]) elif arg.startswith("-excepttext:"): exceptions["text-contains"].append(arg[12:]) elif arg.startswith("-exceptinside:"): exceptions["inside"].append(arg[14:]) elif arg.startswith("-exceptinsidetag:"): exceptions["inside-tags"].append(arg[17:]) elif arg.startswith("-fix:"): fixes_set += [arg[5:]] elif arg.startswith("-sleep:"): sleep = float(arg[7:]) elif arg == "-always": acceptall = True elif arg == "-recursive": recursive = True elif arg == "-nocase": caseInsensitive = True elif arg == "-dotall": dotall = True elif arg == "-multiline": multiline = True elif arg.startswith("-addcat:"): add_cat = arg[8:] elif arg.startswith("-summary:"): edit_summary = arg[9:] elif arg.startswith("-automaticsummary"): edit_summary = True elif arg.startswith("-allowoverlap"): allowoverlap = True elif arg.startswith("-manualinput"): manual_input = True elif arg.startswith("-replacementfile"): issue_deprecation_warning("-replacementfile", "-pairsfile", 2, ArgumentDeprecationWarning) elif arg.startswith("-pairsfile"): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == "-pairsfile": replacement_file = pywikibot.input("Please enter the filename to read replacements from:") else: replacement_file = arg[len("-pairsfile:") :] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error("Incomplete command line pattern replacement pair.") return False if replacement_file_arg_misplaced: pywikibot.error("-pairsfile used between a pattern replacement pair.") return False if replacement_file: try: with codecs.open(replacement_file, "r", "utf-8") as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error("Error loading {0}: {1}".format(replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error("{0} contains an incomplete pattern replacement pair.".format(replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip("\uFEFF") commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input("Please enter the text that should be replaced:") while old: new = pywikibot.input("Please enter the new text:") commandline_replacements += [old, new] old = pywikibot.input("Please enter another text that should be replaced," "\nor press Enter to start:") # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, "replace-replacing", {"description": " (-%s +%s)" % (replacement.old, replacement.new)} ) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output("Available predefined fixes are: %s" % ", ".join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output("The user fixes file could not be found: " "{0}".format(fixes.filename)) return if not fix["replacements"]: pywikibot.warning("No replacements defined for fix " '"{0}"'.format(fix_name)) continue if "msg" in fix: if isinstance(fix["msg"], basestring): set_summary = i18n.twtranslate(site, str(fix["msg"])) else: set_summary = i18n.translate(site, fix["msg"], fallback=True) else: set_summary = None replacement_set = ReplacementList( fix.get("regex"), fix.get("exceptions"), fix.get("nocase"), set_summary, name=fix_name ) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix["replacements"], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format(fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning( 'The old string "{0}" contains formatting ' "characters like U+200E".format(chars.replace_invisible(replacement[0])) ) if not callable(replacement[1]) and chars.contains_invisible(replacement[1]): pywikibot.warning( 'The new string "{0}" contains formatting ' "characters like U+200E".format(chars.replace_invisible(replacement[1])) ) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary ) ) if replacement_set: replacements.extend(replacement_set) if len(fix["replacements"]) == len(missing_fix_summaries): missing_fixes_summaries.append('"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if (not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary): if single_summary: pywikibot.output( "The summary message for the command line " "replacements will be something like: %s" % single_summary ) if missing_fixes_summaries: pywikibot.output( "The summary will not be used when the fix has " "one defined but the following fix(es) do(es) not " "have a summary defined: " "{0}".format(", ".join(missing_fixes_summaries)) ) if edit_summary is not True: edit_summary = pywikibot.input( "Press Enter to use this automatic message, or enter a " "description of the\nchanges your bot will make:" ) else: edit_summary = "" # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = "WHERE (%s)" % " OR ".join( [ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ] ) if exceptions: exceptClause = "AND NOT (%s)" % " OR ".join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions] ) else: exceptClause = "" query = """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % ( whereClause, exceptClause, ) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return False preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot( preloadingGen, replacements, exceptions, allowoverlap, recursive, add_cat, sleep, edit_summary, always=acceptall, site=site, ) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output("\n%s pages changed." % bot.changed_pages)
def test_replace(self): """Test replace_invisible.""" self.assertEqual(chars.replace_invisible('Hello world!'), 'Hello world!') self.assertEqual(chars.replace_invisible('\u200eRTL\u200f'), '<200e>RTL<200f>')
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: unicode """ add_cat = None gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a separate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False sql_query = None # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( 'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg.startswith(('-sql', '-mysqlquery')): if arg.startswith('-sql'): issue_deprecation_warning('The usage of "-sql"', '-mysqlquery', 1, ArgumentDeprecationWarning, since='20180617') useSql = True sql_query = arg.partition(':')[2] elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-automaticsummary'): edit_summary = True elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): issue_deprecation_warning('-replacementfile', '-pairsfile', 2, ArgumentDeprecationWarning, since='20160304') elif arg.startswith('-pairsfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-pairsfile': replacement_file = pywikibot.input( 'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-pairsfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error('-pairsfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error('Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip('\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input('Please enter the text that should be replaced:') while old: new = pywikibot.input('Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') # The summary stored here won't be actually used but is only an example single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', { 'description': ' (-{0} +{1})'.format(replacement.old, replacement.new) }) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {0}'.format( ', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix ' '"{0}"'.format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, basestring): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handleArg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append('"{0}" (replacement #{1})'.format( fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{0}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: ' '{0}'.format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: if not sql_query: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = sql_query or """ SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen, preload=True) if not gen: pywikibot.bot.suggest_help(missing_generator=True) return False bot = ReplaceRobot(gen, replacements, exceptions, allowoverlap, recursive, add_cat, sleep, edit_summary, always=acceptall, site=site) site.login() bot.run() # Explicitly call pywikibot.stopme(). It will make sure the callback is # triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-replacementfile': replacement_file = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-replacementfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error( '-replacementfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error(u'Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip(u'\uFEFF') commandline_replacements.extend(file_replacements) if not(commandline_replacements or fixes_set) or manual_input: old = pywikibot.input(u'Please enter the text that should be replaced:') while old: new = pywikibot.input(u'Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', {'description': ' (-%s +%s)' % (replacement.old, replacement.new)} ) replacements.append(replacement) if not edit_summary: if single_summary: pywikibot.output(u'The summary message for the command line ' 'replacements will be something like: %s' % single_summary) if fixes_set: pywikibot.output('If a summary is defined for the fix, this ' 'default summary won\'t be applied.') edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') # Perform one of the predefined actions. for fix in fixes_set: try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % ', '.join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if "msg" in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary) for replacement in fix['replacements']: summary = None if len(replacement) < 3 else replacement[2] if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if chars.contains_invisible(replacement[1]): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacements.append(ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, site) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
def main(*args): """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. @param args: command line arguments @type args: list of unicode """ add_cat = None gen = None # summary message edit_summary = u"" # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None # Request manual replacements even if replacements are already defined manual_input = False # Replacements loaded from a file replacement_file = None replacement_file_arg_misplaced = False # Read commandline parameters. local_args = pywikibot.handle_args(args) genFactory = pagegenerators.GeneratorFactory() for arg in local_args: if genFactory.handleArg(arg): continue if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = pywikibot.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = i18n.input('pywikibot-enter-xml-filename') else: xmlFilename = arg[5:] elif arg == '-sql': useSql = True elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg.startswith('-fix:'): fixes_set += [arg[5:]] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[8:] elif arg.startswith('-summary:'): edit_summary = arg[9:] elif arg.startswith('-allowoverlap'): allowoverlap = True elif arg.startswith('-manualinput'): manual_input = True elif arg.startswith('-replacementfile'): if len(commandline_replacements) % 2: replacement_file_arg_misplaced = True if arg == '-replacementfile': replacement_file = pywikibot.input( u'Please enter the filename to read replacements from:') else: replacement_file = arg[len('-replacementfile:'):] else: commandline_replacements.append(arg) site = pywikibot.Site() if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return False if replacement_file_arg_misplaced: pywikibot.error( '-replacementfile used between a pattern replacement pair.') return False if replacement_file: try: with codecs.open(replacement_file, 'r', 'utf-8') as f: # strip newlines, but not other characters file_replacements = f.read().splitlines() except (IOError, OSError) as e: pywikibot.error(u'Error loading {0}: {1}'.format( replacement_file, e)) return False if len(file_replacements) % 2: pywikibot.error( '{0} contains an incomplete pattern replacement pair.'.format( replacement_file)) return False # Strip BOM from first line file_replacements[0].lstrip(u'\uFEFF') commandline_replacements.extend(file_replacements) if not (commandline_replacements or fixes_set) or manual_input: old = pywikibot.input( u'Please enter the text that should be replaced:') while old: new = pywikibot.input(u'Please enter the new text:') commandline_replacements += [old, new] old = pywikibot.input( 'Please enter another text that should be replaced,' '\nor press Enter to start:') single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate(site, 'replace-replacing', { 'description': ' (-%s +%s)' % (replacement.old, replacement.new) }) replacements.append(replacement) if not edit_summary: if single_summary: pywikibot.output(u'The summary message for the command line ' 'replacements will be something like: %s' % single_summary) if fixes_set: pywikibot.output('If a summary is defined for the fix, this ' 'default summary won\'t be applied.') edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') # Perform one of the predefined actions. for fix in fixes_set: try: fix = fixes.fixes[fix] except KeyError: pywikibot.output(u'Available predefined fixes are: %s' % ', '.join(fixes.fixes.keys())) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: ' '{0}'.format(fixes.filename)) return if "msg" in fix: if isinstance(fix['msg'], basestring): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary) for replacement in fix['replacements']: summary = None if len(replacement) < 3 else replacement[2] if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if chars.contains_invisible(replacement[1]): pywikibot.warning('The new string "{0}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacements.append( ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern) for (old_regexp, new_text) in replacements ]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join([ "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions ]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) gen = genFactory.getCombinedGenerator(gen) if not gen: # syntax error, show help text from the top of this file pywikibot.showHelp('replace') return preloadingGen = pagegenerators.PreloadingGenerator(gen) bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep, edit_summary, site) site.login() bot.run() # Explicitly call pywikibot.stopme(). # It will make sure the callback is triggered before replace.py is unloaded. pywikibot.stopme() pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
def main(*args: str) -> None: """ Process command line arguments and invoke bot. If args is an empty list, sys.argv is used. :param args: command line arguments """ options = {} gen = None # summary message edit_summary = '' # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] file_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fixes_set = [] # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None xmlStart = None sql_query = None # type: Optional[str] # Set the default regular expression flags flags = 0 # Request manual replacements even if replacements are already defined manual_input = False # Read commandline parameters. genFactory = pagegenerators.GeneratorFactory( disabled_options=['mysqlquery']) local_args = pywikibot.handle_args(args) local_args = genFactory.handle_args(local_args) local_args, exceptions = handle_exceptions(*local_args) for arg in local_args: opt, _, value = arg.partition(':') if opt == '-regex': regex = True elif opt == '-xmlstart': xmlStart = value or pywikibot.input( 'Please enter the dumped article to start with:') elif opt == '-xml': xmlFilename = value or i18n.input('pywikibot-enter-xml-filename') elif opt == '-mysqlquery': sql_query = value elif opt == '-fix': fixes_set.append(value) elif opt == '-sleep': options['sleep'] = float(value) elif opt in ('-allowoverlap', '-always', '-recursive'): options[opt[1:]] = True elif opt == '-nocase': flags |= re.IGNORECASE elif opt == '-dotall': flags |= re.DOTALL elif opt == '-multiline': flags |= re.MULTILINE elif opt == '-addcat': options['addcat'] = value elif opt == '-summary': edit_summary = value elif opt == '-automaticsummary': edit_summary = True elif opt == '-manualinput': manual_input = True elif opt == '-pairsfile': file_replacements = handle_pairsfile(value) else: commandline_replacements.append(arg) if file_replacements is None: return if len(commandline_replacements) % 2: pywikibot.error('Incomplete command line pattern replacement pair.') return commandline_replacements += file_replacements if not(commandline_replacements or fixes_set) or manual_input: commandline_replacements += handle_manual() # The summary stored here won't be actually used but is only an example site = pywikibot.Site() single_summary = None for i in range(0, len(commandline_replacements), 2): replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1]) if not single_summary: single_summary = i18n.twtranslate( site, 'replace-replacing', {'description': ' (-{} +{})'.format(replacement.old, replacement.new)} ) replacements.append(replacement) # Perform one of the predefined actions. missing_fixes_summaries = [] # which a fixes/replacements miss a summary generators_given = bool(genFactory.gens) for fix_name in fixes_set: try: fix = fixes.fixes[fix_name] except KeyError: pywikibot.output('Available predefined fixes are: {}' .format(', '.join(fixes.fixes.keys()))) if not fixes.user_fixes_loaded: pywikibot.output('The user fixes file could not be found: {}' .format(fixes.filename)) return if not fix['replacements']: pywikibot.warning('No replacements defined for fix "{}"' .format(fix_name)) continue if 'msg' in fix: if isinstance(fix['msg'], str): set_summary = i18n.twtranslate(site, str(fix['msg'])) else: set_summary = i18n.translate(site, fix['msg'], fallback=True) else: set_summary = None if not generators_given and 'generator' in fix: gen_args = fix['generator'] if isinstance(gen_args, str): gen_args = [gen_args] for gen_arg in gen_args: genFactory.handle_arg(gen_arg) replacement_set = ReplacementList(fix.get('regex'), fix.get('exceptions'), fix.get('nocase'), set_summary, name=fix_name) # Whether some replacements have a summary, if so only show which # have none, otherwise just mention the complete fix missing_fix_summaries = [] for index, replacement in enumerate(fix['replacements'], start=1): summary = None if len(replacement) < 3 else replacement[2] if not set_summary and not summary: missing_fix_summaries.append( '"{}" (replacement #{})'.format(fix_name, index)) if chars.contains_invisible(replacement[0]): pywikibot.warning('The old string "{}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[0]))) if (not callable(replacement[1]) and chars.contains_invisible(replacement[1])): pywikibot.warning('The new string "{}" contains formatting ' 'characters like U+200E'.format( chars.replace_invisible(replacement[1]))) replacement_set.append(ReplacementListEntry( old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary, )) # Exceptions specified via 'fix' shall be merged to those via CLI. if replacement_set: replacements.extend(replacement_set) if replacement_set._exceptions is not None: for k, v in replacement_set._exceptions.items(): if k in exceptions: exceptions[k] = list(set(exceptions[k]) | set(v)) else: exceptions[k] = v if len(fix['replacements']) == len(missing_fix_summaries): missing_fixes_summaries.append( '"{}" (all replacements)'.format(fix_name)) else: missing_fixes_summaries += missing_fix_summaries if ((not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary)): if single_summary: pywikibot.output('The summary message for the command line ' 'replacements will be something like: ' + single_summary) if missing_fixes_summaries: pywikibot.output('The summary will not be used when the fix has ' 'one defined but the following fix(es) do(es) ' 'not have a summary defined: {}' .format(', '.join(missing_fixes_summaries))) if edit_summary is not True: edit_summary = pywikibot.input( 'Press Enter to use this automatic message, or enter a ' 'description of the\nchanges your bot will make:') else: edit_summary = '' # Pre-compile all regular expressions here to save time later for replacement in replacements: replacement.compile(regex, flags) precompile_exceptions(exceptions, regex, flags) if xmlFilename: gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site) elif sql_query is not None: # Only -excepttext option is considered by the query. Other # exceptions are taken into account by the ReplaceRobot gen = handle_sql(sql_query, replacements, exceptions['text-contains']) gen = genFactory.getCombinedGenerator(gen, preload=True) if pywikibot.bot.suggest_help(missing_generator=not gen): return bot = ReplaceRobot(gen, replacements, exceptions, site=site, summary=edit_summary, **options) site.login() bot.run()