def dologsbackup(wikidb, outfile, wikiconf, start, end, dryrun): ''' do a logs xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {'logs': {'name': outfile}} for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = None else: outfiles[filetype]['compr'] = gzippit(outfiles[filetype]['name']) script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command command.extend(["--wiki=%s" % wikidb, "--logs", "--report=1000", "--output=file:%s" % outfiles['logs']['temp'] ]) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'log_id', 'logging', 50000, 100000, '</logitem>\n')
def do_abstractsbackup(wikidb, output_files, variants, wikiconf, start, end, dryrun): ''' do an abstracts xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {} index = 0 for variant in variants: outfiles[variant] = {'name': output_files[index]} index += 1 for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = None else: outfiles[filetype]['compr'] = catit(outfiles[filetype]['name']) script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command version = MultiVersion.mw_version(wikiconf, wikidb) abstract_cmd_dir = wikiconf.wiki_dir if version: abstract_cmd_dir = abstract_cmd_dir + "/" + version abstract_filter = ("--plugin=AbstractFilter:" "%s/extensions/ActiveAbstract/AbstractFilter.php" % abstract_cmd_dir) command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir, abstract_filter, "--current", "--report=1000"]) for filetype in outfiles: command.extend(["--output=file:%s" % outfiles[filetype]['temp'], "--filter=namespace:NS_MAIN", "--filter=noredirect", "--filter=abstract%s" % filetype]) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 20000, 30000, '</doc>\n')
def dostubsbackup(wikidb, history_file, current_file, articles_file, wikiconf, start, end, dryrun): ''' do a stubs xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {'history': {'name': history_file}, 'current': {'name': current_file}, 'articles': {'name': articles_file}} for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = None else: outfiles[filetype]['compr'] = gzippit(outfiles[filetype]['name']) script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command command.extend(["--wiki=%s" % wikidb, "--full", "--stub", "--report=1000", "--output=file:%s" % outfiles['history']['temp'], "--output=file:%s" % outfiles['current']['temp'], "--filter=latest", "--output=file:%s" % outfiles['articles']['temp'], "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER"]) if wikiconf.stubs_orderrevs: command.append("--orderrevs") callback = get_page_interval else: callback = None do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 100000, '</page>\n', callback)
def dostubsbackup(wikidb, history_file, current_file, articles_file, wikiconf, start, end, dryrun, verbose): ''' do a stubs xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {} if history_file is not None: outfiles['history'] = {'name': history_file} if current_file is not None: outfiles['current'] = {'name': current_file} if articles_file is not None: outfiles['articles'] = {'name': articles_file} for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir), os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']] else: outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']] script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command command.extend(["--wiki=%s" % wikidb, "--full", "--stub", "--report=1000"]) if history_file is not None: command.append("--output=file:%s" % outfiles['history']['temp']) if current_file is not None: command.extend(["--output=file:%s" % outfiles['current']['temp'], "--filter=latest"]) if articles_file is not None: command.extend(["--output=file:%s" % outfiles['articles']['temp'], "--filter=latest", "--filter=notalk", "--filter=namespace:!NS_USER"]) if wikiconf.stubs_orderrevs: command.append("--orderrevs") callback = get_page_interval else: callback = None # the xml header, the body, and the xml footer should be separate gzipped # streams all concatted together # note that do_xml_stream exits on failure after cleaning up all output files # so the parent process must simply retry later do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 20000, '</page>\n', verbose=verbose, callback=callback, header=True) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 20000, '</page>\n', verbose=verbose, callback=callback) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 20000, '</page>\n', verbose=verbose, callback=callback, footer=True)
def do_abstractsbackup(wikidb, output_files, variants, wikiconf, start, end, dryrun, verbose): ''' do an abstracts xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {} index = 0 for variant in variants: outfiles[variant] = {'name': output_files[index]} index += 1 for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir), os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']] else: outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']] script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command version = MultiVersion.mw_version(wikiconf, wikidb) abstract_cmd_dir = wikiconf.wiki_dir if version: abstract_cmd_dir = abstract_cmd_dir + "/" + version filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/AbstractFilter.php") if not os.path.exists(filter_path): filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/includes/AbstractFilter.php") abstract_filter = ("--plugin=AbstractFilter:" + filter_path) command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir, abstract_filter, "--current", "--report=1000", "--namespaces=0"]) for filetype in outfiles: command.extend(["--output=file:%s" % outfiles[filetype]['temp'], "--filter=namespace:NS_MAIN", "--filter=noredirect", "--filter=abstract%s" % filetype]) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose, header=True) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose, footer=True)