def get_prev_incrdate(self, date, dumpok=False, revidok=False): ''' find the most recent incr dump before the specified date if "dumpok" is True, find most recent dump that completed successfully if "revidok" is True, find most recent dump that has a populated maxrevid.txt file ''' previous = None old = self.dirs.get_misc_dumpdirs() if old: for dump in old: if dump == date: return previous else: if dumpok: status_info = StatusInfo(self.wiki.config, dump, self.wiki.db_name) if status_info.get_status(dump) == "done": previous = dump elif revidok: max_revid_file = MaxRevIDFile(self.wiki.config, dump, self.wiki.db_name) if exists(max_revid_file.get_path()): revid = FileUtils.read_file(max_revid_file.get_path().rstrip()) if int(revid) > 0: previous = dump else: previous = dump return previous
def get_status(self, date=None): ''' return the status of the dump run for the given wiki and date, or the empty string if there is no run or no information available ''' status = "" if exists(self.status_file.get_path(date)): status = FileUtils.read_file(self.status_file.get_path(date)).rstrip() return status
def read_max_revid_from_file(self, date=None): ''' read and return max rev id for wiki from file ''' if date is None: date = self.wiki.date try: file_obj = MaxRevIDFile(self.wiki.config, date, self.wiki.db_name) return FileUtils.read_file(file_obj.get_path().rstrip()) except Exception as ex: log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(), exc_info=ex) return None
def get_stat_text(self, dump_date, wikiname): """ generate and return the text string describing the status of the dump of the wiki for the given date """ stat = StatusFile(self.args["config"], dump_date, wikiname) stat_contents = FileUtils.read_file(stat.get_path()) log.info("status for %s %s", wikiname, safe(stat_contents)) if stat_contents: stat_text = "(%s)" % (stat_contents) else: stat_text = None return stat_text
def dump_max_revid(self): ''' dump maximum rev id from wiki that's older than the configured number of seconds (cutoff) we have this cutoff so that content really new is not dumped; we want to give curators the chance to remove problematic entries first. a cutoff of some hours is reasonable. ''' max_id = None revidfile = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name) if not exists(revidfile.get_path()): log.info("Wiki %s retrieving max revid from db.", self.wiki.db_name) query = ("select rev_id from revision where rev_timestamp < \"%s\" " "order by rev_timestamp desc limit 1" % self.cutoff) db_info = DbServerInfo(self.wiki, self.wiki.db_name) results = db_info.run_sql_and_get_output(query) if results: lines = results.splitlines() if lines and lines[1] and lines[1].isdigit(): max_id = lines[1] FileUtils.write_file_in_place(revidfile.get_path(), max_id, self.wiki.config.fileperms) try: file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name) max_revid = FileUtils.read_file(file_obj.get_path().rstrip()) except Exception as ex: log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(), exc_info=ex) max_revid = None # end rev id is not included in dump if max_revid is not None: max_revid = str(int(max_revid) + 1) log.info("max_revid is %s", safe(max_revid)) return max_revid
def read_template(self, name): ''' read a file out of the configured template dir and return the contents ''' template = os.path.join(self.template_dir, name) return FileUtils.read_file(template)
def _get_lockfile_contents(self): try: contents = FileUtils.read_file(self.lockfile.get_path(self.date)) return contents.split() except Exception: return None, None