def dump_html(self):
        '''
        dump HTML-formated revision content from RESTBase
        for the given wiki and date
        '''
        dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
        outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date)
        htmlfile = HTMLFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
        outputfile = htmlfile.get_filename(self.args['ns'])
        # /usr/bin/nodejs ./bin/dump_wiki --domain en.wikipedia.org --ns 0 \
        # --apiURL http://en.wikipedia.org/w/api.php \
        # --dataBase /srv/www/htmldumps/dumps/20160826/en.wikipedia.org.articles.ns0.sqlite3
        domain = self.get_domain_from_wikidbname()
        # FIXME: the nodejs wrapper which will do the compress etc stuff for one wiki is
        # not yet written
        command = [self.wiki.config.nodejs]
        command.append(self.wiki.config.scriptpath)
        command.extend(["--domain", domain, "--ns", self.args['ns'],
                        "--apiURL", "http://%s/w/api.php" % domain,
                        "--dataBase", os.path.join(outputdir, outputfile),
                        "--wiki=%s" % self.wiki.db_name,
                        "--output=gzip:%s" % os.path.join(outputdir, outputfile)])

        if self.dryrun:
            print("would run command for html dump:", command)
        else:
            success = RunSimpleCommand.run_with_no_output(
                command, shell=False,
                timeout=self.get_lock_timeout_interval(),
                timeout_callback=self.periodic_callback)
            if not success:
                self.log.warning("error producing html files for wiki %s", self.wiki.db_name)
                return False
        return True
Exemple #2
0
    def dump_stub(self, start_revid, end_revid):
        '''
        dump stubs (metadata) for revs from start_revid
        up to but not including end_revid
        '''
        if not self.steps['stubs']['run']:
            return True

        dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
        outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date)
        stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
        outputfile = stubfile.get_filename()
        script_command = MultiVersion.mw_script_as_array(self.wiki.config,
                                                         "dumpBackup.php")
        command = [self.wiki.config.php]
        command.extend(script_command)
        command.extend(["--wiki=%s" % self.wiki.db_name, "--stub", "--quiet",
                        "--output=gzip:%s" % os.path.join(outputdir, outputfile),
                        "--revrange", "--revstart=%s" % start_revid,
                        "--revend=%s" % end_revid])
        if self.dryrun:
            print "would run command for stubs dump:", command
        else:
            log.info("running with no output: " + " ".join(command))
            success = RunSimpleCommand.run_with_no_output(
                command, shell=False, timeout=self.get_lock_timeout_interval(),
                timeout_callback=self.periodic_callback)
            if not success:
                log.warning("error producing stub files for wiki %s", self.wiki.db_name)
                return False
        return True
Exemple #3
0
 def dump_revs(self):
     '''
     dump revision content corresponding to previously-dumped
     stubs (revision metadata)
     '''
     if not self.steps['revs']['run']:
         return True
     dumpdir = MiscDumpDir(self.wiki.config, self.wiki.date)
     outputdir = dumpdir.get_dumpdir(self.wiki.db_name, self.wiki.date)
     revsfile = RevsFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
     outputfile = revsfile.get_filename()
     script_command = MultiVersion.mw_script_as_array(self.wiki.config,
                                                      "dumpTextPass.php")
     command = [self.wiki.config.php]
     command.extend(script_command)
     stubfile = StubFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
     stuboutputfile = stubfile.get_filename()
     command.extend(["--wiki=%s" % self.wiki.db_name,
                     "--stub=gzip:%s" % os.path.join(outputdir, stuboutputfile),
                     "--quiet",
                     "--spawn=%s" % self.wiki.config.php,
                     "--output=bzip2:%s" % os.path.join(outputdir, outputfile)])
     if self.dryrun:
         print "would run command for revs dump:", command
     else:
         log.info("running with no output: " + " ".join(command))
         success = RunSimpleCommand.run_with_no_output(
             command, shell=False, timeout=self.get_lock_timeout_interval(),
             timeout_callback=self.periodic_callback)
         if not success:
             log.warning("error producing revision text files"
                         " for wiki %s", self.wiki.db_name)
             return False
     return True
Exemple #4
0
    def run(self, wiki, filenameformat, output_dir, overwrite, base=None):
        '''
        run a (maintenance) script on one wiki, expecting relevant output to
        go to a file
        '''
        (outfile_base, outfile_path) = self.skip_if_done(
            wiki, filenameformat, output_dir, overwrite)
        if outfile_base is None:
            return True

        command = self.get_command(wiki, outfile_path,
                                   outfile_base, base)

        if not isinstance(command, basestring):
            # see if the list elts are lists tht need to be turned into strings
            command = [element if isinstance(element, basestring)
                       else ' '.join(element) for element in command]
            command = '|'.join(command)
        if self.dryrun:
            print "Would run:",
            print command
            return True
        else:
            return RunSimpleCommand.run_with_no_output(
                command, maxtries=1, shell=True, verbose=self.verbose)
 def get_known_tables(self):
     dbserver = DbServerInfo(self, self.db_name)
     commands = dbserver.build_sql_command("'show tables'")
     echocmd = commands[0]
     mysqlcmd = commands[1]
     to_run = " ".join(echocmd) + " | " + " ".join(mysqlcmd) + " --silent"
     results = RunSimpleCommand.run_with_output(to_run, shell=True)
     return results.decode('utf-8').splitlines()
Exemple #6
0
def run_simple_query(query, wiki):
    '''
    run a mysql query which returns only one field from
    one row.
    return the value of that one field (as a string)
    '''
    db_info = DbServerInfo(wiki, wiki.db_name)
    commands = db_info.build_sql_command(query)
    echocmd = commands[0]
    mysqlcmd = commands[1]
    to_run = " ".join(echocmd) + " | " + " ".join(mysqlcmd) + " --silent"
    log.info("running with no output: " + to_run)
    return RunSimpleCommand.run_with_output(to_run, shell=True)
 def run(self, wiki, filenameformat, output_dir, overwrite, base=None):
     '''
     run a (maintenance) script on one wiki, expecting relevant output to
     go to a file
     '''
     filenameformat = filenameformat.replace('{d}', '{{d}}')
     filenameformat = filenameformat.replace('{w}', '{{w}}')
     filenameformat = filenameformat.format(s=self.scriptname)
     (outfile_base, outfile_path) = self.skip_if_done(
         wiki, filenameformat, output_dir, overwrite)
     if outfile_base is None:
         return True
     command = self.get_command(wiki, outfile_path, outfile_base, base)
     if self.dryrun:
         print("Would run:", command)
         return True
     return RunSimpleCommand.run_with_output(
         command, maxtries=1, shell=False)
 def get_domain_from_wikidbname(self):
     '''
     given the name of the wiki db, turn this into the
     fqdn of the wiki project (i.e. enwiki -> en.wikipedia.org)
     '''
     script_command = MultiVersion.mw_script_as_array(self.wiki.config,
                                                      "eval.php")
     # echo $wgCanonicalServer | php "$multiversionscript" eval.php $wiki
     command = ["echo", "'echo $wgCanonicalServer;'", "|", self.wiki.config.php]
     command.extend(script_command)
     command.append(self.wiki.db_name)
     command_text = " ".join(command)
     self.log.info("running with no output: %s", command_text)
     output = RunSimpleCommand.run_with_output(command_text, shell=True)
     if not output:
         self.log.warning("error retrieving domain for wiki %s", self.wiki.db_name)
         return None
     # rstrip gets rid of any trailing newlines from eval.php
     return output.decode('utf-8').split('//')[1].rstrip()
    def get_db_user_and_password(self):
        # get these by running a MediaWiki maintenance script;
        # yes, this means you need a full installation of MediaWiki
        # (but not web service) in order to use these methods

        command_list = MultiVersion.mw_script_as_array(self.config, "getConfiguration.php")
        pull_vars = ["wgDBuser", "wgDBpassword"]
        command = "{php} {command} --wiki={dbname} --format=json --regex='{vars}'"
        command = command.format(
            php=MiscUtils.shell_escape(self.config.php),
            command=" ".join(command_list),
            dbname=MiscUtils.shell_escape(self.db_name),
            vars="|".join(pull_vars))
        results = RunSimpleCommand.run_with_output(command, shell=True).strip()
        settings = json.loads(results.decode('utf-8'))
        db_user = settings['wgDBuser']
        db_password = settings['wgDBpassword']

        return db_user, db_password