def do_abstractsbackup(wikidb, output_files, variants, wikiconf, start, end, dryrun, verbose): ''' do an abstracts xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {} index = 0 for variant in variants: outfiles[variant] = {'name': output_files[index]} index += 1 for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( FileUtils.wiki_tempdir(wikidb, wikiconf.temp_dir), os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = [None, outfiles[filetype]['name']] else: outfiles[filetype]['compr'] = [gzippit_append, outfiles[filetype]['name']] script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command version = MultiVersion.mw_version(wikiconf, wikidb) abstract_cmd_dir = wikiconf.wiki_dir if version: abstract_cmd_dir = abstract_cmd_dir + "/" + version filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/AbstractFilter.php") if not os.path.exists(filter_path): filter_path = os.path.join(abstract_cmd_dir, "extensions/ActiveAbstract/includes/AbstractFilter.php") abstract_filter = ("--plugin=AbstractFilter:" + filter_path) command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir, abstract_filter, "--current", "--report=1000", "--namespaces=0"]) for filetype in outfiles: command.extend(["--output=file:%s" % outfiles[filetype]['temp'], "--filter=namespace:NS_MAIN", "--filter=noredirect", "--filter=abstract%s" % filetype]) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose, header=True) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 5000, 10000, '</doc>\n', verbose=verbose, footer=True)
def do_abstractsbackup(wikidb, output_files, variants, wikiconf, start, end, dryrun): ''' do an abstracts xml dump one piece at a time, writing into uncompressed temporary files and shovelling those into gzip's stdin for the concatenated compressed output ''' outfiles = {} index = 0 for variant in variants: outfiles[variant] = {'name': output_files[index]} index += 1 for filetype in outfiles: outfiles[filetype]['temp'] = os.path.join( wikiconf.temp_dir, os.path.basename(outfiles[filetype]['name']) + "_tmp") if dryrun: outfiles[filetype]['compr'] = None else: outfiles[filetype]['compr'] = catit(outfiles[filetype]['name']) script_command = MultiVersion.mw_script_as_array(wikiconf, "dumpBackup.php") command = [wikiconf.php] + script_command version = MultiVersion.mw_version(wikiconf, wikidb) abstract_cmd_dir = wikiconf.wiki_dir if version: abstract_cmd_dir = abstract_cmd_dir + "/" + version abstract_filter = ("--plugin=AbstractFilter:" "%s/extensions/ActiveAbstract/AbstractFilter.php" % abstract_cmd_dir) command.extend(["--wiki=%s" % wikidb, abstract_cmd_dir, abstract_filter, "--current", "--report=1000"]) for filetype in outfiles: command.extend(["--output=file:%s" % outfiles[filetype]['temp'], "--filter=namespace:NS_MAIN", "--filter=noredirect", "--filter=abstract%s" % filetype]) do_xml_stream(wikidb, outfiles, command, wikiconf, start, end, dryrun, 'page_id', 'page', 20000, 30000, '</doc>\n')