def record_max_revid(self): ''' get max rev id for wiki from db, save it to file ''' self.get_max_revid() if not self.dryrun: file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name) FileUtils.write_file_in_place(file_obj.get_path(), self.max_id, self.wiki.config.fileperms)
def do_all_wikis(self): """ generate index.html file for all wikis for the given date. FIXME maybe this should be for the latest run date? Hrm. """ text = "" for wikiname in self.args["config"].all_wikis_list: result = self.do_one_wiki(wikiname) if result: log.info("result for wiki %s is %s", wikiname, result) text = text + "<li>" + result + "</li>\n" index_text = self.args["config"].read_template(self.args["config"].indextmpl) % {"items": text} FileUtils.write_file_in_place(self.indexfile.get_path(), index_text, self.args["config"].fileperms)
def dump_aliases(self): ''' returns True on success False or exception on error are fine ''' if not self.steps['aliases']['run']: return True try: contents = "for wiki %s: alias meow=more\n" % self.wiki.db_name aliasesfile = AliasesFile(self.wiki.config, self.wiki.date, self.wiki.db_name) FileUtils.write_file_in_place(aliasesfile.get_path(), contents, self.wiki.config.fileperms) return True except Exception as ex: log.info("Error encountered dumping namespaces for %s ", self.wiki.db_name, exc_info=ex) raise
def md5sums(wiki, fileperms, files, mandatory): ''' generate md5sums for specified files for dump of given wiki and specific date, and save them to output file ''' md5file = MD5File(wiki.config, wiki.date, wiki.db_name) text = "" errors = False for fname in files: try: text = text + "%s\n" % md5sum_one_file(fname) FileUtils.write_file_in_place(md5file.get_path(), text, fileperms) except Exception as ex: log.warning("Error encountered in md5sum for %s", fname, exc_info=ex) if fname in mandatory: errors = True return not errors
def dump_max_revid(self): ''' dump maximum rev id from wiki that's older than the configured number of seconds (cutoff) we have this cutoff so that content really new is not dumped; we want to give curators the chance to remove problematic entries first. a cutoff of some hours is reasonable. ''' max_id = None revidfile = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name) if not exists(revidfile.get_path()): log.info("Wiki %s retrieving max revid from db.", self.wiki.db_name) query = ("select rev_id from revision where rev_timestamp < \"%s\" " "order by rev_timestamp desc limit 1" % self.cutoff) db_info = DbServerInfo(self.wiki, self.wiki.db_name) results = db_info.run_sql_and_get_output(query) if results: lines = results.splitlines() if lines and lines[1] and lines[1].isdigit(): max_id = lines[1] FileUtils.write_file_in_place(revidfile.get_path(), max_id, self.wiki.config.fileperms) try: file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name) max_revid = FileUtils.read_file(file_obj.get_path().rstrip()) except Exception as ex: log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(), exc_info=ex) max_revid = None # end rev id is not included in dump if max_revid is not None: max_revid = str(int(max_revid) + 1) log.info("max_revid is %s", safe(max_revid)) return max_revid
def set_status(self, status): ''' write out the status information supplied for the dump run ''' FileUtils.write_file_in_place(self.status_file.get_path(), status, self._config.fileperms)