def dump_html(self): ''' dump HTML-formated revision content from RESTBase for the given wiki and date ''' dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) htmlfile = HTMLFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = htmlfile.get_filename(self.args['ns']) # /usr/bin/nodejs ./bin/dump_wiki --domain en.wikipedia.org --ns 0 \ # --apiURL http://en.wikipedia.org/w/api.php \ # --dataBase /srv/www/htmldumps/dumps/20160826/en.wikipedia.org.articles.ns0.sqlite3 domain = self.get_domain_from_wikidbname() # FIXME: the nodejs wrapper which will do the compress etc stuff for one wiki is # not yet written command = [self.wiki.config.nodejs] command.append(self.wiki.config.scriptpath) command.extend(["--domain", domain, "--ns", self.args['ns'], "--apiURL", "http://%s/w/api.php" % domain, "--dataBase", os.path.join(outputdir, outputfile), "--wiki=%s" % self.wiki.db_name, "--output=gzip:%s" % os.path.join(outputdir, outputfile)]) if self.dryrun: print("would run command for html dump:", command) else: success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: self.log.warning("error producing html files for wiki %s", self.wiki.db_name) return False return True
def dump_stub(self, start_revid, end_revid): ''' dump stubs (metadata) for revs from start_revid up to but not including end_revid ''' if not self.steps['stubs']['run']: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = stubfile.get_filename() script_command = MultiVersion.mw_script_as_array(self.wiki.config, "dumpBackup.php") command = [self.wiki.config.php] command.extend(script_command) command.extend(["--wiki=%s" % self.wiki.db_name, "--stub", "--quiet", "--output=gzip:%s" % os.path.join(outputdir, outputfile), "--revrange", "--revstart=%s" % start_revid, "--revend=%s" % end_revid]) if self.dryrun: print "would run command for stubs dump:", command else: log.info("running with no output: " + " ".join(command)) success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: log.warning("error producing stub files for wiki %s", self.wiki.db_name) return False return True
def dump_revs(self): ''' dump revision content corresponding to previously-dumped stubs (revision metadata) ''' if not self.steps['revs']['run']: return True dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date) outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date) revsfile = RevsFile(self.wiki.config, self.wiki.date, self.wiki.db_name) outputfile = revsfile.get_filename() script_command = MultiVersion.mw_script_as_array(self.wiki.config, "dumpTextPass.php") command = [self.wiki.config.php] command.extend(script_command) stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name) stuboutputfile = stubfile.get_filename() command.extend(["--wiki=%s" % self.wiki.db_name, "--stub=gzip:%s" % os.path.join(outputdir, stuboutputfile), "--quiet", "--spawn=%s" % self.wiki.config.php, "--output=bzip2:%s" % os.path.join(outputdir, outputfile)]) if self.dryrun: print "would run command for revs dump:", command else: log.info("running with no output: " + " ".join(command)) success = RunSimpleCommand.run_with_no_output( command, shell=False, timeout=self.get_lock_timeout_interval(), timeout_callback=self.periodic_callback) if not success: log.warning("error producing revision text files" " for wiki %s", self.wiki.db_name) return False return True
def run(self, wiki, filenameformat, output_dir, overwrite, base=None): ''' run a (maintenance) script on one wiki, expecting relevant output to go to a file ''' (outfile_base, outfile_path) = self.skip_if_done( wiki, filenameformat, output_dir, overwrite) if outfile_base is None: return True command = self.get_command(wiki, outfile_path, outfile_base, base) if not isinstance(command, basestring): # see if the list elts are lists tht need to be turned into strings command = [element if isinstance(element, basestring) else ' '.join(element) for element in command] command = '|'.join(command) if self.dryrun: print "Would run:", print command return True else: return RunSimpleCommand.run_with_no_output( command, maxtries=1, shell=True, verbose=self.verbose)
def get_known_tables(self): dbserver = DbServerInfo(self, self.db_name) commands = dbserver.build_sql_command("'show tables'") echocmd = commands[0] mysqlcmd = commands[1] to_run = " ".join(echocmd) + " | " + " ".join(mysqlcmd) + " --silent" results = RunSimpleCommand.run_with_output(to_run, shell=True) return results.decode('utf-8').splitlines()
def run_simple_query(query, wiki): ''' run a mysql query which returns only one field from one row. return the value of that one field (as a string) ''' db_info = DbServerInfo(wiki, wiki.db_name) commands = db_info.build_sql_command(query) echocmd = commands[0] mysqlcmd = commands[1] to_run = " ".join(echocmd) + " | " + " ".join(mysqlcmd) + " --silent" log.info("running with no output: " + to_run) return RunSimpleCommand.run_with_output(to_run, shell=True)
def run(self, wiki, filenameformat, output_dir, overwrite, base=None): ''' run a (maintenance) script on one wiki, expecting relevant output to go to a file ''' filenameformat = filenameformat.replace('{d}', '{{d}}') filenameformat = filenameformat.replace('{w}', '{{w}}') filenameformat = filenameformat.format(s=self.scriptname) (outfile_base, outfile_path) = self.skip_if_done( wiki, filenameformat, output_dir, overwrite) if outfile_base is None: return True command = self.get_command(wiki, outfile_path, outfile_base, base) if self.dryrun: print("Would run:", command) return True return RunSimpleCommand.run_with_output( command, maxtries=1, shell=False)
def get_domain_from_wikidbname(self): ''' given the name of the wiki db, turn this into the fqdn of the wiki project (i.e. enwiki -> en.wikipedia.org) ''' script_command = MultiVersion.mw_script_as_array(self.wiki.config, "eval.php") # echo $wgCanonicalServer | php "$multiversionscript" eval.php $wiki command = ["echo", "'echo $wgCanonicalServer;'", "|", self.wiki.config.php] command.extend(script_command) command.append(self.wiki.db_name) command_text = " ".join(command) self.log.info("running with no output: %s", command_text) output = RunSimpleCommand.run_with_output(command_text, shell=True) if not output: self.log.warning("error retrieving domain for wiki %s", self.wiki.db_name) return None # rstrip gets rid of any trailing newlines from eval.php return output.decode('utf-8').split('//')[1].rstrip()
def get_db_user_and_password(self): # get these by running a MediaWiki maintenance script; # yes, this means you need a full installation of MediaWiki # (but not web service) in order to use these methods command_list = MultiVersion.mw_script_as_array(self.config, "getConfiguration.php") pull_vars = ["wgDBuser", "wgDBpassword"] command = "{php} {command} --wiki={dbname} --format=json --regex='{vars}'" command = command.format( php=MiscUtils.shell_escape(self.config.php), command=" ".join(command_list), dbname=MiscUtils.shell_escape(self.db_name), vars="|".join(pull_vars)) results = RunSimpleCommand.run_with_output(command, shell=True).strip() settings = json.loads(results.decode('utf-8')) db_user = settings['wgDBuser'] db_password = settings['wgDBpassword'] return db_user, db_password