def list_outfiles_for_cleanup(self, dump_dir, dump_names=None): files = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) files_to_return = [] if self.page_id_range: # this file is for one page range only if ',' in self.page_id_range: (first_page_id, last_page_id) = self.page_id_range.split(',', 2) first_page_id = int(first_page_id) last_page_id = int(last_page_id) else: first_page_id = int(self.page_id_range) last_page_id = None # checkpoint files cover specific page ranges. for those, # list only files within the given page range for cleanup for fname in files: if fname.is_checkpoint_file: if (not first_page_id or (fname.first_page_id and (int(fname.first_page_id) >= first_page_id))): if (not last_page_id or (fname.last_page_id and (int(fname.last_page_id) <= last_page_id))): files_to_return.append(fname) else: files_to_return.append(fname) else: files_to_return = files return files_to_return
def list_outfiles_for_cleanup(self, dump_dir): """ returns: list of DumpFilename """ dump_names = self.list_dumpnames() dfnames = [] dfnames.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)) return dfnames
def get_tmp_files(self, dump_dir, dump_names=None): """ list temporary output files currently existing returns: list of DumpFilename """ dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) return [dfname for dfname in dfnames if dfname.is_temp_file]
def list_outfiles_for_cleanup(self, dump_dir, dump_names=None): """ list output files including checkpoint files currently existing (from the dump run for the current wiki and date), in case we have been requested to clean up before a retry args: DumpDir, list of dump names ("stub-meta-history", ...) returns: list of DumpFilename """ dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) dfnames_to_return = [] if self.jobinfo['pageid_range']: # this file is for one page range only if ',' in self.jobinfo['pageid_range']: (first_page_id, last_page_id) = self.jobinfo['pageid_range'].split(',', 2) first_page_id = int(first_page_id) last_page_id = int(last_page_id) else: first_page_id = int(self.jobinfo['pageid_range']) last_page_id = None # checkpoint files cover specific page ranges. for those, # list only files within the given page range for cleanup for dfname in dfnames: if dfname.is_checkpoint_file: if (not first_page_id or (dfname.first_page_id and (int(dfname.first_page_id) >= first_page_id))): if (not last_page_id or (dfname.last_page_id and (int(dfname.last_page_id) <= last_page_id))): dfnames_to_return.append(dfname) else: dfnames_to_return.append(dfname) else: dfnames_to_return = dfnames return dfnames_to_return
def list_outfiles_for_cleanup(self, dump_dir): dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)) return files