def do_abstractsbackup(wikidb, output_files, variants,
                       wikiconf, start, end, dryrun, verbose):
    '''
    do an abstracts xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {}
    index = 0
    for variant in variants:
        outfiles[variant] = {'name': output_files[index]}
        index += 1

    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir),
            os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']]
        else:
            outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']]

    script_command = MultiVersion.mw_script_as_array(wikiconf,
                                                     "dumpBackup.php")
    command = [wikiconf.php] + script_command
    version = MultiVersion.mw_version(wikiconf, wikidb)
    abstract_cmd_dir = wikiconf.wiki_dir
    if version:
        abstract_cmd_dir = abstract_cmd_dir + "/" + version
    filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/AbstractFilter.php")
    if not os.path.exists(filter_path):
        filter_path = os.path.join(abstract_cmd_dir,
                                   "extensions/ActiveAbstract/includes/AbstractFilter.php")
    abstract_filter = ("--plugin=AbstractFilter:" + filter_path)

    command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir,
                    abstract_filter,
                    "--current", "--report=1000", "--namespaces=0"])

    for filetype in outfiles:
        command.extend(["--output=file:%s" % outfiles[filetype]['temp'],
                        "--filter=namespace:NS_MAIN",
                        "--filter=noredirect",
                        "--filter=abstract%s" % filetype])

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose, header=True)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose, footer=True)
Example #2
0
def do_abstractsbackup(wikidb, output_files, variants,
                       wikiconf, start, end, dryrun):
    '''
    do an abstracts xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {}
    index = 0
    for variant in variants:
        outfiles[variant] = {'name': output_files[index]}
        index += 1

    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            wikiconf.temp_dir,
            os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = None
        else:
            outfiles[filetype]['compr'] = catit(outfiles[filetype]['name'])

    script_command = MultiVersion.mw_script_as_array(wikiconf,
                                                     "dumpBackup.php")
    command = [wikiconf.php] + script_command
    version = MultiVersion.mw_version(wikiconf, wikidb)
    abstract_cmd_dir = wikiconf.wiki_dir
    if version:
        abstract_cmd_dir = abstract_cmd_dir + "/" + version
    abstract_filter = ("--plugin=AbstractFilter:"
                       "%s/extensions/ActiveAbstract/AbstractFilter.php"
                       % abstract_cmd_dir)
    command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir,
                    abstract_filter,
                    "--current", "--report=1000"])

    for filetype in outfiles:
            command.extend(["--output=file:%s" % outfiles[filetype]['temp'],
                            "--filter=namespace:NS_MAIN",
                            "--filter=noredirect",
                            "--filter=abstract%s" % filetype])

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  20000, 30000, '</doc>\n')