def db_latest_status(self): ''' return list of tuples for each wiki: status of latest wiki dump or None if wiki never dumped, wiki name ''' dbinfo = [] for dbname in self.db_list: wiki = Wiki(self, dbname) last = wiki.latest_dump() status = '' if last: dump_status = StatusHtml.get_statusfile_path(wiki, last) try: status = FileUtils.read_file(dump_status) except Exception as ex: status = 'failed' for value in ['missing', 'not yet', 'failed', 'aborted', 'progress', 'partial', 'complete']: if value in status: status = value break else: status = None dbinfo.append((dbname, status, last)) return dbinfo
def refresh_notice(self): # if the notice file has changed or gone away, we comply. notice_file = self._get_notice_filename() if exists(notice_file): self.notice = FileUtils.read_file(notice_file) else: self.notice = ""
def cp_chksum_tmpfiles_to_permfile(self): if Checksummer.NAME in self._enabled: for htype in Checksummer.HASHTYPES: tmp_filename = self._get_checksum_filename_tmp(htype) real_filename = self._get_checksum_filename(htype) text = FileUtils.read_file(tmp_filename) FileUtils.write_file(self.wiki.config.temp_dir, real_filename, text, self.wiki.config.fileperms)
def refresh_notice(self): ''' if the notice file has changed or gone away, we comply. ''' notice_filepath = self._get_notice_filename() if os.path.exists(notice_filepath): self.notice = FileUtils.read_file(notice_filepath) else: self.notice = ""
def db_info_by_age(self, use_status_time=False): """ Sort wikis in reverse order of last successful dump and return tuples of information for each wiki: * whether the dump failed, * the date of the run as found in dump dir string OR as determined by time of status file, if use_status_time is True, * age of status file if any, * wiki name Order is (DumpFailed, Age), and False < True: First, wikis whose latest dump was successful, most recent dump first Then, wikis whose latest dump failed, most recent dump first. Finally, wikis which have never been dumped. According to that sort, the last item of this list is, when applicable, the oldest failed dump attempt. If some error occurs checking a dump status, that dump is put last in the list (sort value is (True, maxsize) ) Note that we now sort this list by the date of the dump directory, not the last date that a dump file in that directory may have been touched. This allows us to rerun jobs to completion from older runs, for example an en pedia history urn that failed in the middle, without borking the index page links. """ available = [] today = int(TimeUtils.today()) for dbname in self.db_list: wiki = Wiki(self, dbname) age = sys.maxsize date = sys.maxsize last = wiki.latest_dump() status = '' if last: dump_status = StatusHtml.get_statusfile_path(wiki, last) try: if use_status_time: # only use the status file time, not the dir date date = today else: date = today - int(last) # tack on the file mtime so that if we have multiple wikis # dumped on the same day, they get ordered properly age = FileUtils.file_age(dump_status) status = FileUtils.read_file(dump_status) except Exception as ex: print("dump dir missing status file %s?" % dump_status) dump_failed = (status == '') or ('dump aborted' in status) available.append((dump_failed, date, age, dbname)) available = sorted(available) return available
def get_checksum_from_file(path): ''' get the checksum recorded in a file which should have one line, consisting of the checksum, two spaces, and the filename that was checksummed return None on any error ''' try: content = FileUtils.read_file(path) checksum, _filename = content.split(' ', 1) return checksum except Exception: return None
def status_line(wiki, aborted=False): date = wiki.latest_dump() if date: if aborted: return StatusHtml.report_statusline( wiki, "<span class=\"failed\">dump aborted</span>") status = StatusHtml.get_statusfile_path(wiki, date) try: return FileUtils.read_file(status) except Exception as ex: return StatusHtml.report_statusline(wiki, "missing status record") else: return StatusHtml.report_statusline(wiki, "has not yet been dumped")
def do_main(): ''' main entry point, do all the work ''' (configfile, date, dryrun, filenameformat, output_dir, overwrite, wikiname, script, basename, query, retries, verbose, remainder) = get_args() validate_args(date, output_dir, retries, script, query) if retries is None: retries = "3" retries = int(retries) if configfile: config = Config(configfile) else: config = Config() if date is None: date = TimeUtils.today() if script is not None: runner = ScriptRunner(script, remainder, dryrun, verbose) else: if query is None: query = FileUtils.read_file(config.queryfile) runner = QueryRunner(query, dryrun, verbose) if basename is not None: base = Wiki(config, basename) base.set_date(date) if base is not None: base.config.parse_conffile_per_project(base.db_name) else: base = None if wikiname is not None: wiki = Wiki(config, wikiname) wiki.set_date(date) wikirunner = WikiRunner(runner, wiki, filenameformat, output_dir, base) wikirunner.do_one_wiki(overwrite) else: wikirunner = WikiRunnerLoop(config, runner, filenameformat, output_dir, base) wikirunner.do_all_wikis_til_done(retries, overwrite, date)
def cp_chksum_tmpfiles_to_permfile(self): """ during a dump run, checksum files are written to a temporary location and updated there; we copy the content from these files into the permanent location after each dump job completes """ if Checksummer.NAME in self._enabled: for htype in Checksummer.HASHTYPES: for fmt in Checksummer.FORMATS: tmp_filename = self._get_checksum_filename_tmp(htype, fmt) real_filename = self._get_checksum_path(htype, fmt) content = FileUtils.read_file(tmp_filename) FileUtils.write_file( FileUtils.wiki_tempdir(self.wiki.db_name, self.wiki.config.temp_dir), real_filename, content, self.wiki.config.fileperms)
def generate_index(config, other_indexhtml=None, sorted_by_db=False): running = False states = [] if sorted_by_db: dbs = sorted(config.db_list) else: dbs = config.db_list_by_age() for db_name in dbs: try: wiki = Wiki(config, db_name) locker = Locker(wiki) lockfiles = locker.is_stale(all_locks=True) if lockfiles: locker.cleanup_stale_locks(lockfiles) running = running or locker.is_locked(all_locks=True) states.append(StatusHtml.status_line(wiki)) except Exception: # if there's a problem with one wiki at least # let's show the rest if VERBOSE: traceback.print_exc(file=sys.stdout) if running: status = "Dumps are in progress..." elif exists("maintenance.txt"): status = FileUtils.read_file("maintenance.txt") else: status = "Dump process is idle." if other_indexhtml is None: other_index_link = "" else: if sorted_by_db: other_sortedby = "dump date" else: other_sortedby = "wiki name" other_index_link = ('Also view sorted by <a href="%s">%s</a>' % (os.path.basename(other_indexhtml), other_sortedby)) return config.read_template("download-index.html") % { "otherIndexLink": other_index_link, "status": status, "items": "\n".join(states)}
def write_notice_file(self): if NoticeFile.NAME in self._enabled: notice_file = self._get_notice_filename() # delnotice. toss any existing file if self.notice is False: if exists(notice_file): os.remove(notice_file) self.notice = "" # addnotice, stuff notice in a file for other jobs etc elif self.notice != "": # notice_dir = self._get_notice_dir() FileUtils.write_file(self.wiki.config.temp_dir, notice_file, self.notice, self.wiki.config.fileperms) # default case. if there is a file get the contents, otherwise # we have empty contents, all good else: if exists(notice_file): self.notice = FileUtils.read_file(notice_file)
def status_line(wiki, aborted=False): ''' read the status information from the status html file and attempt to return it on failure, makes a reasonable guess about the dump status and returns that if 'aborted' is True, don't read in anything but return a line of html that dump was aborted ''' date = wiki.latest_dump() if date: if aborted: return StatusHtml.report_statusline( wiki, "<span class=\"failed\">dump aborted</span>") status = StatusHtml.get_statusfile_path(wiki, date) try: return FileUtils.read_file(status) except Exception: return StatusHtml.report_statusline(wiki, "missing status record") else: return StatusHtml.report_statusline(wiki, "has not yet been dumped")
def write_notice(self): ''' write notice file if self.notice has contents, or remove if it self.notice is false, or read existing file and stash contents, if self.notice is empty str ''' if Notice.NAME in self._enabled: notice_filepath = self._get_notice_filename() # delnotice. toss any existing file if self.notice is False: if os.path.exists(notice_filepath): os.remove(notice_filepath) self.notice = "" # addnotice, stuff notice in a file for other jobs etc elif self.notice != "": FileUtils.write_file( FileUtils.wiki_tempdir(self.wiki.db_name, self.wiki.config.temp_dir), notice_filepath, self.notice, self.wiki.config.fileperms) # default case. if there is a file get the contents, otherwise # we have empty contents, all good else: if os.path.exists(notice_filepath): self.notice = FileUtils.read_file(notice_filepath)
def read_template(self, name): template = os.path.join(self.template_dir, name) return FileUtils.read_file(template)