Beispiel #1
0
def dologsbackup(wikidb, outfile,
                 wikiconf, start, end, dryrun):
    '''
    do a logs xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {'logs': {'name': outfile}}
    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = None
        else:
            outfiles[filetype]['compr'] = gzippit(outfiles[filetype]['name'])

    script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php")
    command = [wikiconf.php] + script_command

    command.extend(["--wiki=%s" % wikidb,
                    "--logs", "--report=1000",
                    "--output=file:%s" % outfiles['logs']['temp']
                    ])

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'log_id', 'logging',
                  50000, 100000, '</logitem>\n')
Beispiel #2
0
def do_abstractsbackup(wikidb, output_files, variants,
                       wikiconf, start, end, dryrun):
    '''
    do an abstracts xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {}
    index = 0
    for variant in variants:
        outfiles[variant] = {'name': output_files[index]}
        index += 1

    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            wikiconf.temp_dir,
            os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = None
        else:
            outfiles[filetype]['compr'] = catit(outfiles[filetype]['name'])

    script_command = MultiVersion.mw_script_as_array(wikiconf,
                                                     "dumpBackup.php")
    command = [wikiconf.php] + script_command
    version = MultiVersion.mw_version(wikiconf, wikidb)
    abstract_cmd_dir = wikiconf.wiki_dir
    if version:
        abstract_cmd_dir = abstract_cmd_dir + "/" + version
    abstract_filter = ("--plugin=AbstractFilter:"
                       "%s/extensions/ActiveAbstract/AbstractFilter.php"
                       % abstract_cmd_dir)
    command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir,
                    abstract_filter,
                    "--current", "--report=1000"])

    for filetype in outfiles:
            command.extend(["--output=file:%s" % outfiles[filetype]['temp'],
                            "--filter=namespace:NS_MAIN",
                            "--filter=noredirect",
                            "--filter=abstract%s" % filetype])

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  20000, 30000, '</doc>\n')
Beispiel #3
0
def dostubsbackup(wikidb, history_file, current_file, articles_file,
                  wikiconf, start, end, dryrun):
    '''
    do a stubs xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {'history': {'name': history_file},
                'current': {'name': current_file},
                'articles': {'name': articles_file}}
    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = None
        else:
            outfiles[filetype]['compr'] = gzippit(outfiles[filetype]['name'])

    script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php")
    command = [wikiconf.php] + script_command

    command.extend(["--wiki=%s" % wikidb,
                    "--full", "--stub", "--report=1000",
                    "--output=file:%s" % outfiles['history']['temp'],
                    "--output=file:%s" % outfiles['current']['temp'],
                    "--filter=latest",
                    "--output=file:%s" % outfiles['articles']['temp'],
                    "--filter=latest", "--filter=notalk",
                    "--filter=namespace:!NS_USER"])

    if wikiconf.stubs_orderrevs:
        command.append("--orderrevs")
        callback = get_page_interval
    else:
        callback = None

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 100000, '</page>\n', callback)
Beispiel #4
0
def dostubsbackup(wikidb, history_file, current_file, articles_file,
                  wikiconf, start, end, dryrun, verbose):
    '''
    do a stubs xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {}
    if history_file is not None:
        outfiles['history'] = {'name': history_file}
    if current_file is not None:
        outfiles['current'] = {'name': current_file}
    if articles_file is not None:
        outfiles['articles'] = {'name': articles_file}

    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir),
            os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']]
        else:
            outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']]

    script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php")
    command = [wikiconf.php] + script_command

    command.extend(["--wiki=%s" % wikidb,
                    "--full", "--stub", "--report=1000"])
    if history_file is not None:
        command.append("--output=file:%s" % outfiles['history']['temp'])
    if current_file is not None:
        command.extend(["--output=file:%s" % outfiles['current']['temp'],
                        "--filter=latest"])
    if articles_file is not None:
        command.extend(["--output=file:%s" % outfiles['articles']['temp'],
                        "--filter=latest", "--filter=notalk",
                        "--filter=namespace:!NS_USER"])

    if wikiconf.stubs_orderrevs:
        command.append("--orderrevs")
        callback = get_page_interval
    else:
        callback = None

    # the xml header, the body, and the xml footer should be separate gzipped
    # streams all concatted together
    # note that do_xml_stream exits on failure after cleaning up all output files
    # so the parent process must simply retry later
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 20000, '</page>\n', verbose=verbose, callback=callback, header=True)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 20000, '</page>\n', verbose=verbose, callback=callback)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 20000, '</page>\n', verbose=verbose, callback=callback, footer=True)
def do_abstractsbackup(wikidb, output_files, variants,
                       wikiconf, start, end, dryrun, verbose):
    '''
    do an abstracts xml dump one piece at a time, writing into uncompressed
    temporary files and shovelling those into gzip's stdin for the
    concatenated compressed output
    '''
    outfiles = {}
    index = 0
    for variant in variants:
        outfiles[variant] = {'name': output_files[index]}
        index += 1

    for filetype in outfiles:
        outfiles[filetype]['temp'] = os.path.join(
            FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir),
            os.path.basename(outfiles[filetype]['name']) + "_tmp")
        if dryrun:
            outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']]
        else:
            outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']]

    script_command = MultiVersion.mw_script_as_array(wikiconf,
                                                     "dumpBackup.php")
    command = [wikiconf.php] + script_command
    version = MultiVersion.mw_version(wikiconf, wikidb)
    abstract_cmd_dir = wikiconf.wiki_dir
    if version:
        abstract_cmd_dir = abstract_cmd_dir + "/" + version
    filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/AbstractFilter.php")
    if not os.path.exists(filter_path):
        filter_path = os.path.join(abstract_cmd_dir,
                                   "extensions/ActiveAbstract/includes/AbstractFilter.php")
    abstract_filter = ("--plugin=AbstractFilter:" + filter_path)

    command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir,
                    abstract_filter,
                    "--current", "--report=1000", "--namespaces=0"])

    for filetype in outfiles:
        command.extend(["--output=file:%s" % outfiles[filetype]['temp'],
                        "--filter=namespace:NS_MAIN",
                        "--filter=noredirect",
                        "--filter=abstract%s" % filetype])

    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose, header=True)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose)
    do_xml_stream(wikidb, outfiles, command, wikiconf,
                  start, end, dryrun, 'page_id', 'page',
                  5000, 10000, '</doc>\n', verbose=verbose, footer=True)