def test_contains(self):
     """Test contains_invisible."""
     self.assertFalse(chars.contains_invisible('Hello world!'))
     self.assertTrue(chars.contains_invisible('\u200eRTL\u200f'))
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = ""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        "title": [],
        "text-contains": [],
        "inside": [],
        "inside-tags": [],
        "require-title": [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == "-regex":
            regex = True
        elif arg.startswith("-xmlstart"):
            if len(arg) == 9:
                xmlStart = pywikibot.input("Please enter the dumped article to start with:")
            else:
                xmlStart = arg[10:]
        elif arg.startswith("-xml"):
            if len(arg) == 4:
                xmlFilename = i18n.input("pywikibot-enter-xml-filename")
            else:
                xmlFilename = arg[5:]
        elif arg == "-sql":
            useSql = True
        elif arg.startswith("-excepttitle:"):
            exceptions["title"].append(arg[13:])
        elif arg.startswith("-requiretitle:"):
            exceptions["require-title"].append(arg[14:])
        elif arg.startswith("-excepttext:"):
            exceptions["text-contains"].append(arg[12:])
        elif arg.startswith("-exceptinside:"):
            exceptions["inside"].append(arg[14:])
        elif arg.startswith("-exceptinsidetag:"):
            exceptions["inside-tags"].append(arg[17:])
        elif arg.startswith("-fix:"):
            fixes_set += [arg[5:]]
        elif arg.startswith("-sleep:"):
            sleep = float(arg[7:])
        elif arg == "-always":
            acceptall = True
        elif arg == "-recursive":
            recursive = True
        elif arg == "-nocase":
            caseInsensitive = True
        elif arg == "-dotall":
            dotall = True
        elif arg == "-multiline":
            multiline = True
        elif arg.startswith("-addcat:"):
            add_cat = arg[8:]
        elif arg.startswith("-summary:"):
            edit_summary = arg[9:]
        elif arg.startswith("-automaticsummary"):
            edit_summary = True
        elif arg.startswith("-allowoverlap"):
            allowoverlap = True
        elif arg.startswith("-manualinput"):
            manual_input = True
        elif arg.startswith("-replacementfile"):
            issue_deprecation_warning("-replacementfile", "-pairsfile", 2, ArgumentDeprecationWarning)
        elif arg.startswith("-pairsfile"):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == "-pairsfile":
                replacement_file = pywikibot.input("Please enter the filename to read replacements from:")
            else:
                replacement_file = arg[len("-pairsfile:") :]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error("Incomplete command line pattern replacement pair.")
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error("-pairsfile used between a pattern replacement pair.")
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, "r", "utf-8") as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error("Error loading {0}: {1}".format(replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error("{0} contains an incomplete pattern replacement pair.".format(replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip("\uFEFF")
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input("Please enter the text that should be replaced:")
        while old:
            new = pywikibot.input("Please enter the new text:")
            commandline_replacements += [old, new]
            old = pywikibot.input("Please enter another text that should be replaced," "\nor press Enter to start:")

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i], commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, "replace-replacing", {"description": " (-%s +%s)" % (replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output("Available predefined fixes are: %s" % ", ".join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output("The user fixes file could not be found: " "{0}".format(fixes.filename))
            return
        if not fix["replacements"]:
            pywikibot.warning("No replacements defined for fix " '"{0}"'.format(fix_name))
            continue
        if "msg" in fix:
            if isinstance(fix["msg"], basestring):
                set_summary = i18n.twtranslate(site, str(fix["msg"]))
            else:
                set_summary = i18n.translate(site, fix["msg"], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(
            fix.get("regex"), fix.get("exceptions"), fix.get("nocase"), set_summary, name=fix_name
        )
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix["replacements"], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning(
                    'The old string "{0}" contains formatting '
                    "characters like U+200E".format(chars.replace_invisible(replacement[0]))
                )
            if not callable(replacement[1]) and chars.contains_invisible(replacement[1]):
                pywikibot.warning(
                    'The new string "{0}" contains formatting '
                    "characters like U+200E".format(chars.replace_invisible(replacement[1]))
                )
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0], new=replacement[1], fix_set=replacement_set, edit_summary=summary
                )
            )

        if replacement_set:
            replacements.extend(replacement_set)

        if len(fix["replacements"]) == len(missing_fix_summaries):
            missing_fixes_summaries.append('"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if (not edit_summary or edit_summary is True) and (missing_fixes_summaries or single_summary):
        if single_summary:
            pywikibot.output(
                "The summary message for the command line " "replacements will be something like: %s" % single_summary
            )
        if missing_fixes_summaries:
            pywikibot.output(
                "The summary will not be used when the fix has "
                "one defined but the following fix(es) do(es) not "
                "have a summary defined: "
                "{0}".format(", ".join(missing_fixes_summaries))
            )
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                "Press Enter to use this automatic message, or enter a "
                "description of the\nchanges your bot will make:"
            )
        else:
            edit_summary = ""

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions, site)
    elif useSql:
        whereClause = "WHERE (%s)" % " OR ".join(
            [
                "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements
            ]
        )
        if exceptions:
            exceptClause = "AND NOT (%s)" % " OR ".join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]
            )
        else:
            exceptClause = ""
        query = """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (
            whereClause,
            exceptClause,
        )
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(
        preloadingGen,
        replacements,
        exceptions,
        allowoverlap,
        recursive,
        add_cat,
        sleep,
        edit_summary,
        always=acceptall,
        site=site,
    )
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output("\n%s pages changed." % bot.changed_pages)
Exemple #3
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a separate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    sql_query = None
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg.startswith(('-sql', '-mysqlquery')):
            if arg.startswith('-sql'):
                issue_deprecation_warning('The usage of "-sql"',
                                          '-mysqlquery',
                                          1,
                                          ArgumentDeprecationWarning,
                                          since='20180617')
            useSql = True
            sql_query = arg.partition(':')[2]
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-automaticsummary'):
            edit_summary = True
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            issue_deprecation_warning('-replacementfile',
                                      '-pairsfile',
                                      2,
                                      ArgumentDeprecationWarning,
                                      since='20160304')
        elif arg.startswith('-pairsfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-pairsfile':
                replacement_file = pywikibot.input(
                    'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-pairsfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error('-pairsfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error('Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip('\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input('Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input('Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    # The summary stored here won't be actually used but is only an example
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing', {
                    'description':
                    ' (-{0} +{1})'.format(replacement.old, replacement.new)
                })
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {0}'.format(
                ', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix '
                              '"{0}"'.format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, basestring):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handleArg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append('"{0}" (replacement #{1})'.format(
                    fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{0}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: ' +
                             single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: '
                             '{0}'.format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        if not sql_query:
            whereClause = 'WHERE (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" %
                prepareRegexForMySQL(old_regexp.pattern)
                for (old_regexp, new_text) in replacements
            ])
            if exceptions:
                exceptClause = 'AND NOT (%s)' % ' OR '.join([
                    "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                    for exc in exceptions
                ])
            else:
                exceptClause = ''
        query = sql_query or """
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen, preload=True)

    if not gen:
        pywikibot.bot.suggest_help(missing_generator=True)
        return False

    bot = ReplaceRobot(gen,
                       replacements,
                       exceptions,
                       allowoverlap,
                       recursive,
                       add_cat,
                       sleep,
                       edit_summary,
                       always=acceptall,
                       site=site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme(). It will make sure the callback is
    # triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output('\n{0} pages changed.'.format(bot.changed_pages))
Exemple #4
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title':         [],
        'text-contains': [],
        'inside':        [],
        'inside-tags':   [],
        'require-title': [],  # using a seperate requirements dict needs some
    }                        # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-replacementfile':
                replacement_file = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-replacementfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error(
            '-replacementfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error(u'Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip(u'\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not(commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input(u'Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input(u'Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing',
                {'description':
                 ' (-%s +%s)' % (replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    if not edit_summary:
        if single_summary:
            pywikibot.output(u'The summary message for the command line '
                             'replacements will be something like: %s'
                             % single_summary)
        if fixes_set:
            pywikibot.output('If a summary is defined for the fix, this '
                             'default summary won\'t be applied.')
        edit_summary = pywikibot.input(
            'Press Enter to use this automatic message, or enter a '
            'description of the\nchanges your bot will make:')

    # Perform one of the predefined actions.
    for fix in fixes_set:
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s'
                             % ', '.join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary)
        for replacement in fix['replacements']:
            summary = None if len(replacement) < 3 else replacement[2]
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                    chars.replace_invisible(replacement[0])))
            if chars.contains_invisible(replacement[1]):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                    chars.replace_invisible(replacement[1])))
            replacements.append(ReplacementListEntry(
                old=replacement[0],
                new=replacement[1],
                fix_set=replacement_set,
                edit_summary=summary,
            ))

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions, site)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join(
            ["old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
             for (old_regexp, new_text) in replacements])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join(
                ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                 for exc in exceptions])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary,
                       site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
Exemple #5
0
def main(*args):
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    @param args: command line arguments
    @type args: list of unicode
    """
    add_cat = None
    gen = None
    # summary message
    edit_summary = u""
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []
    # Don't edit pages which contain certain texts.
    exceptions = {
        'title': [],
        'text-contains': [],
        'inside': [],
        'inside-tags': [],
        'require-title': [],  # using a seperate requirements dict needs some
    }  # major refactoring of code.

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    useSql = False
    # will become True when the user presses a ('yes to all') or uses the
    # -always flag.
    acceptall = False
    # Will become True if the user inputs the commandline parameter -nocase
    caseInsensitive = False
    # Will become True if the user inputs the commandline parameter -dotall
    dotall = False
    # Will become True if the user inputs the commandline parameter -multiline
    multiline = False
    # Do all hits when they overlap
    allowoverlap = False
    # Do not recurse replacement
    recursive = False
    # Between a regex and another (using -fix) sleep some time (not to waste
    # too much CPU
    sleep = None
    # Request manual replacements even if replacements are already defined
    manual_input = False
    # Replacements loaded from a file
    replacement_file = None
    replacement_file_arg_misplaced = False

    # Read commandline parameters.

    local_args = pywikibot.handle_args(args)
    genFactory = pagegenerators.GeneratorFactory()

    for arg in local_args:
        if genFactory.handleArg(arg):
            continue
        if arg == '-regex':
            regex = True
        elif arg.startswith('-xmlstart'):
            if len(arg) == 9:
                xmlStart = pywikibot.input(
                    u'Please enter the dumped article to start with:')
            else:
                xmlStart = arg[10:]
        elif arg.startswith('-xml'):
            if len(arg) == 4:
                xmlFilename = i18n.input('pywikibot-enter-xml-filename')
            else:
                xmlFilename = arg[5:]
        elif arg == '-sql':
            useSql = True
        elif arg.startswith('-excepttitle:'):
            exceptions['title'].append(arg[13:])
        elif arg.startswith('-requiretitle:'):
            exceptions['require-title'].append(arg[14:])
        elif arg.startswith('-excepttext:'):
            exceptions['text-contains'].append(arg[12:])
        elif arg.startswith('-exceptinside:'):
            exceptions['inside'].append(arg[14:])
        elif arg.startswith('-exceptinsidetag:'):
            exceptions['inside-tags'].append(arg[17:])
        elif arg.startswith('-fix:'):
            fixes_set += [arg[5:]]
        elif arg.startswith('-sleep:'):
            sleep = float(arg[7:])
        elif arg == '-always':
            acceptall = True
        elif arg == '-recursive':
            recursive = True
        elif arg == '-nocase':
            caseInsensitive = True
        elif arg == '-dotall':
            dotall = True
        elif arg == '-multiline':
            multiline = True
        elif arg.startswith('-addcat:'):
            add_cat = arg[8:]
        elif arg.startswith('-summary:'):
            edit_summary = arg[9:]
        elif arg.startswith('-allowoverlap'):
            allowoverlap = True
        elif arg.startswith('-manualinput'):
            manual_input = True
        elif arg.startswith('-replacementfile'):
            if len(commandline_replacements) % 2:
                replacement_file_arg_misplaced = True

            if arg == '-replacementfile':
                replacement_file = pywikibot.input(
                    u'Please enter the filename to read replacements from:')
            else:
                replacement_file = arg[len('-replacementfile:'):]
        else:
            commandline_replacements.append(arg)

    site = pywikibot.Site()

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return False

    if replacement_file_arg_misplaced:
        pywikibot.error(
            '-replacementfile used between a pattern replacement pair.')
        return False

    if replacement_file:
        try:
            with codecs.open(replacement_file, 'r', 'utf-8') as f:
                # strip newlines, but not other characters
                file_replacements = f.read().splitlines()
        except (IOError, OSError) as e:
            pywikibot.error(u'Error loading {0}: {1}'.format(
                replacement_file, e))
            return False

        if len(file_replacements) % 2:
            pywikibot.error(
                '{0} contains an incomplete pattern replacement pair.'.format(
                    replacement_file))
            return False

        # Strip BOM from first line
        file_replacements[0].lstrip(u'\uFEFF')
        commandline_replacements.extend(file_replacements)

    if not (commandline_replacements or fixes_set) or manual_input:
        old = pywikibot.input(
            u'Please enter the text that should be replaced:')
        while old:
            new = pywikibot.input(u'Please enter the new text:')
            commandline_replacements += [old, new]
            old = pywikibot.input(
                'Please enter another text that should be replaced,'
                '\nor press Enter to start:')

    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(site, 'replace-replacing', {
                'description':
                ' (-%s +%s)' % (replacement.old, replacement.new)
            })
        replacements.append(replacement)

    if not edit_summary:
        if single_summary:
            pywikibot.output(u'The summary message for the command line '
                             'replacements will be something like: %s' %
                             single_summary)
        if fixes_set:
            pywikibot.output('If a summary is defined for the fix, this '
                             'default summary won\'t be applied.')
        edit_summary = pywikibot.input(
            'Press Enter to use this automatic message, or enter a '
            'description of the\nchanges your bot will make:')

    # Perform one of the predefined actions.
    for fix in fixes_set:
        try:
            fix = fixes.fixes[fix]
        except KeyError:
            pywikibot.output(u'Available predefined fixes are: %s' %
                             ', '.join(fixes.fixes.keys()))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: '
                                 '{0}'.format(fixes.filename))
            return
        if "msg" in fix:
            if isinstance(fix['msg'], basestring):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'), set_summary)
        for replacement in fix['replacements']:
            summary = None if len(replacement) < 3 else replacement[2]
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if chars.contains_invisible(replacement[1]):
                pywikibot.warning('The new string "{0}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacements.append(
                ReplacementListEntry(
                    old=replacement[0],
                    new=replacement[1],
                    fix_set=replacement_set,
                    edit_summary=summary,
                ))

    # Set the regular expression flags
    flags = re.UNICODE
    if caseInsensitive:
        flags = flags | re.IGNORECASE
    if dotall:
        flags = flags | re.DOTALL
    if multiline:
        flags = flags | re.MULTILINE

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        try:
            xmlStart
        except NameError:
            xmlStart = None
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements,
                                          exceptions, site)
    elif useSql:
        whereClause = 'WHERE (%s)' % ' OR '.join([
            "old_text RLIKE '%s'" % prepareRegexForMySQL(old_regexp.pattern)
            for (old_regexp, new_text) in replacements
        ])
        if exceptions:
            exceptClause = 'AND NOT (%s)' % ' OR '.join([
                "old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
                for exc in exceptions
            ])
        else:
            exceptClause = ''
        query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
        gen = pagegenerators.MySQLPageGenerator(query)

    gen = genFactory.getCombinedGenerator(gen)

    if not gen:
        # syntax error, show help text from the top of this file
        pywikibot.showHelp('replace')
        return

    preloadingGen = pagegenerators.PreloadingGenerator(gen)
    bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall,
                       allowoverlap, recursive, add_cat, sleep, edit_summary,
                       site)
    site.login()
    bot.run()

    # Explicitly call pywikibot.stopme().
    # It will make sure the callback is triggered before replace.py is unloaded.
    pywikibot.stopme()
    pywikibot.output(u'\n%s pages changed.' % bot.changed_pages)
 def test_contains(self):
     """Test contains_invisible."""
     self.assertFalse(chars.contains_invisible('Hello world!'))
     self.assertTrue(chars.contains_invisible('\u200eRTL\u200f'))
Exemple #7
0
def main(*args: str) -> None:
    """
    Process command line arguments and invoke bot.

    If args is an empty list, sys.argv is used.

    :param args: command line arguments
    """
    options = {}
    gen = None
    # summary message
    edit_summary = ''
    # Array which will collect commandline parameters.
    # First element is original text, second element is replacement text.
    commandline_replacements = []
    file_replacements = []
    # A list of 2-tuples of original text and replacement text.
    replacements = []

    # Should the elements of 'replacements' and 'exceptions' be interpreted
    # as regular expressions?
    regex = False
    # Predefined fixes from dictionary 'fixes' (see above).
    fixes_set = []
    # the dump's path, either absolute or relative, which will be used
    # if -xml flag is present
    xmlFilename = None
    xmlStart = None
    sql_query = None  # type: Optional[str]
    # Set the default regular expression flags
    flags = 0
    # Request manual replacements even if replacements are already defined
    manual_input = False

    # Read commandline parameters.
    genFactory = pagegenerators.GeneratorFactory(
        disabled_options=['mysqlquery'])
    local_args = pywikibot.handle_args(args)
    local_args = genFactory.handle_args(local_args)
    local_args, exceptions = handle_exceptions(*local_args)

    for arg in local_args:
        opt, _, value = arg.partition(':')
        if opt == '-regex':
            regex = True
        elif opt == '-xmlstart':
            xmlStart = value or pywikibot.input(
                'Please enter the dumped article to start with:')
        elif opt == '-xml':
            xmlFilename = value or i18n.input('pywikibot-enter-xml-filename')
        elif opt == '-mysqlquery':
            sql_query = value
        elif opt == '-fix':
            fixes_set.append(value)
        elif opt == '-sleep':
            options['sleep'] = float(value)
        elif opt in ('-allowoverlap', '-always', '-recursive'):
            options[opt[1:]] = True
        elif opt == '-nocase':
            flags |= re.IGNORECASE
        elif opt == '-dotall':
            flags |= re.DOTALL
        elif opt == '-multiline':
            flags |= re.MULTILINE
        elif opt == '-addcat':
            options['addcat'] = value
        elif opt == '-summary':
            edit_summary = value
        elif opt == '-automaticsummary':
            edit_summary = True
        elif opt == '-manualinput':
            manual_input = True
        elif opt == '-pairsfile':
            file_replacements = handle_pairsfile(value)
        else:
            commandline_replacements.append(arg)

    if file_replacements is None:
        return

    if len(commandline_replacements) % 2:
        pywikibot.error('Incomplete command line pattern replacement pair.')
        return

    commandline_replacements += file_replacements
    if not(commandline_replacements or fixes_set) or manual_input:
        commandline_replacements += handle_manual()

    # The summary stored here won't be actually used but is only an example
    site = pywikibot.Site()
    single_summary = None
    for i in range(0, len(commandline_replacements), 2):
        replacement = Replacement(commandline_replacements[i],
                                  commandline_replacements[i + 1])
        if not single_summary:
            single_summary = i18n.twtranslate(
                site, 'replace-replacing',
                {'description':
                 ' (-{} +{})'.format(replacement.old, replacement.new)}
            )
        replacements.append(replacement)

    # Perform one of the predefined actions.
    missing_fixes_summaries = []  # which a fixes/replacements miss a summary
    generators_given = bool(genFactory.gens)
    for fix_name in fixes_set:
        try:
            fix = fixes.fixes[fix_name]
        except KeyError:
            pywikibot.output('Available predefined fixes are: {}'
                             .format(', '.join(fixes.fixes.keys())))
            if not fixes.user_fixes_loaded:
                pywikibot.output('The user fixes file could not be found: {}'
                                 .format(fixes.filename))
            return
        if not fix['replacements']:
            pywikibot.warning('No replacements defined for fix "{}"'
                              .format(fix_name))
            continue
        if 'msg' in fix:
            if isinstance(fix['msg'], str):
                set_summary = i18n.twtranslate(site, str(fix['msg']))
            else:
                set_summary = i18n.translate(site, fix['msg'], fallback=True)
        else:
            set_summary = None
        if not generators_given and 'generator' in fix:
            gen_args = fix['generator']
            if isinstance(gen_args, str):
                gen_args = [gen_args]
            for gen_arg in gen_args:
                genFactory.handle_arg(gen_arg)
        replacement_set = ReplacementList(fix.get('regex'),
                                          fix.get('exceptions'),
                                          fix.get('nocase'),
                                          set_summary,
                                          name=fix_name)
        # Whether some replacements have a summary, if so only show which
        # have none, otherwise just mention the complete fix
        missing_fix_summaries = []
        for index, replacement in enumerate(fix['replacements'], start=1):
            summary = None if len(replacement) < 3 else replacement[2]
            if not set_summary and not summary:
                missing_fix_summaries.append(
                    '"{}" (replacement #{})'.format(fix_name, index))
            if chars.contains_invisible(replacement[0]):
                pywikibot.warning('The old string "{}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[0])))
            if (not callable(replacement[1])
                    and chars.contains_invisible(replacement[1])):
                pywikibot.warning('The new string "{}" contains formatting '
                                  'characters like U+200E'.format(
                                      chars.replace_invisible(replacement[1])))
            replacement_set.append(ReplacementListEntry(
                old=replacement[0],
                new=replacement[1],
                fix_set=replacement_set,
                edit_summary=summary,
            ))

        # Exceptions specified via 'fix' shall be merged to those via CLI.
        if replacement_set:
            replacements.extend(replacement_set)
            if replacement_set._exceptions is not None:
                for k, v in replacement_set._exceptions.items():
                    if k in exceptions:
                        exceptions[k] = list(set(exceptions[k]) | set(v))
                    else:
                        exceptions[k] = v

        if len(fix['replacements']) == len(missing_fix_summaries):
            missing_fixes_summaries.append(
                '"{}" (all replacements)'.format(fix_name))
        else:
            missing_fixes_summaries += missing_fix_summaries

    if ((not edit_summary or edit_summary is True)
            and (missing_fixes_summaries or single_summary)):
        if single_summary:
            pywikibot.output('The summary message for the command line '
                             'replacements will be something like: '
                             + single_summary)
        if missing_fixes_summaries:
            pywikibot.output('The summary will not be used when the fix has '
                             'one defined but the following fix(es) do(es) '
                             'not have a summary defined: {}'
                             .format(', '.join(missing_fixes_summaries)))
        if edit_summary is not True:
            edit_summary = pywikibot.input(
                'Press Enter to use this automatic message, or enter a '
                'description of the\nchanges your bot will make:')
        else:
            edit_summary = ''

    # Pre-compile all regular expressions here to save time later
    for replacement in replacements:
        replacement.compile(regex, flags)

    precompile_exceptions(exceptions, regex, flags)

    if xmlFilename:
        gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
                                          replacements, exceptions, site)
    elif sql_query is not None:
        # Only -excepttext option is considered by the query. Other
        # exceptions are taken into account by the ReplaceRobot
        gen = handle_sql(sql_query, replacements, exceptions['text-contains'])

    gen = genFactory.getCombinedGenerator(gen, preload=True)
    if pywikibot.bot.suggest_help(missing_generator=not gen):
        return

    bot = ReplaceRobot(gen, replacements, exceptions, site=site,
                       summary=edit_summary, **options)
    site.login()
    bot.run()