def __init__(self, subset, name, desc, detail, item_for_stubs, prefetch, prefetchdate, spawn, wiki, partnum_todo, parts=False, checkpoints=False, checkpoint_file=None, page_id_range=None, verbose=False): self._subset = subset self._detail = detail self._desc = desc self._prefetch = prefetch self._prefetchdate = prefetchdate self._spawn = spawn self._parts = parts if self._parts: self._parts_enabled = True self.onlyparts = True self._page_id = {} self._partnum_todo = partnum_todo self.wiki = wiki self.item_for_stubs = item_for_stubs if checkpoints: self._checkpoints_enabled = True self.checkpoint_file = checkpoint_file self.page_id_range = page_id_range self.verbose = verbose self._prerequisite_items = [self.item_for_stubs] self._check_truncation = True Dump.__init__(self, name, desc, self.verbose)
def __init__(self, name, desc, item_for_recombine): # no partnum_todo, no parts generally (False, False), even though input may have it self.item_for_recombine = item_for_recombine self._prerequisite_items = [self.item_for_recombine] Dump.__init__(self, name, desc) # the input may have checkpoints but the output will not. self._checkpoints_enabled = False
def __init__(self, subset, name, desc, detail, item_for_stubs, prefetch, prefetchdate, spawn, wiki, partnum_todo, parts=False, checkpoints=False, checkpoint_file=None, page_id_range=None, verbose=False): self.jobinfo = {'subset': subset, 'detail': detail, 'desc': desc, 'prefetch': prefetch, 'prefetchdate': prefetchdate, 'spawn': spawn, 'partnum_todo': partnum_todo, 'pageid_range': page_id_range, 'item_for_stubs': item_for_stubs} if checkpoints: self._checkpoints_enabled = True self.checkpoint_file = checkpoint_file self._parts = parts if self._parts: self._parts_enabled = True self.onlyparts = True self.wiki = wiki self.verbose = verbose self._prerequisite_items = [self.jobinfo['item_for_stubs']] self.stubber = StubProvider( self.wiki, {'dumpname': self.get_dumpname(), 'parts': self._parts, 'dumpnamebase': self.get_dumpname_base(), 'item_for_stubs': item_for_stubs, 'partnum_todo': self.jobinfo['partnum_todo']}, self.verbose) Dump.__init__(self, name, desc, self.verbose)
def __init__(self, name, desc, partnum_todo, db_name, parts=False): self._partnum_todo = partnum_todo self._parts = parts if self._parts: self._parts_enabled = True self.onlyparts = True self.db_name = db_name Dump.__init__(self, name, desc)
def __init__(self, desc, partnum_todo, jobsperbatch=None, parts=False): self._partnum_todo = partnum_todo self.jobsperbatch = jobsperbatch self._parts = parts if self._parts: self._parts_enabled = True self.onlyparts = True Dump.__init__(self, "xmlpagelogsdump", desc)
def __init__(self, name, desc, detail, item_for_xml_dumps): # no prefetch, no spawn self.item_for_xml_dumps = item_for_xml_dumps self._detail = detail self._prerequisite_items = [self.item_for_xml_dumps] Dump.__init__(self, name, desc) # the input may have checkpoints but the output will not. self._checkpoints_enabled = False
def __init__(self, name, desc, detail, item_for_recombine, wiki): self._detail = detail self._desc = desc self.wiki = wiki self.item_for_recombine = item_for_recombine self._prerequisite_items = [self.item_for_recombine] Dump.__init__(self, name, desc) # the input may have checkpoints but the output will not. self._checkpoints_enabled = False self._parts_enabled = False
def __init__(self, name, desc, partnum_todo, jobsperbatch=None, parts=False, checkpoints=False): self._partnum_todo = partnum_todo self.jobsperbatch = jobsperbatch self._parts = parts if self._parts: self._parts_enabled = True self.onlyparts = True self.history_dump_name = "stub-meta-history" self.current_dump_name = "stub-meta-current" self.articles_dump_name = "stub-articles" if checkpoints: self._checkpoints_enabled = True Dump.__init__(self, name, desc)
def __init__(self, subset, name, desc, detail, item_for_recompression, wiki, partnum_todo, parts=False, checkpoints=False, checkpoint_file=None): self._subset = subset self._detail = detail self._parts = parts if self._parts: self._parts_enabled = True self._partnum_todo = partnum_todo self.wiki = wiki self.item_for_recompression = item_for_recompression if checkpoints: self._checkpoints_enabled = True self.checkpoint_file = checkpoint_file self._prerequisite_items = [self.item_for_recompression] Dump.__init__(self, name, desc)
def list_outfiles_for_cleanup(self, dump_dir, dump_names=None): files = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) files_to_return = [] if self.page_id_range: # this file is for one page range only if ',' in self.page_id_range: (first_page_id, last_page_id) = self.page_id_range.split(',', 2) first_page_id = int(first_page_id) last_page_id = int(last_page_id) else: first_page_id = int(self.page_id_range) last_page_id = None # checkpoint files cover specific page ranges. for those, # list only files within the given page range for cleanup for fname in files: if fname.is_checkpoint_file: if (not first_page_id or (fname.first_page_id and (int(fname.first_page_id) >= first_page_id))): if (not last_page_id or (fname.last_page_id and (int(fname.last_page_id) <= last_page_id))): files_to_return.append(fname) else: files_to_return.append(fname) else: files_to_return = files return files_to_return
def list_outfiles_for_cleanup(self, dump_dir): """ returns: list of DumpFilename """ dump_names = self.list_dumpnames() dfnames = [] dfnames.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)) return dfnames
def list_outfiles_to_check_for_truncation(self, dump_dir): """ returns: list of DumpFilename """ dump_names = self.list_dumpnames() dfnames = [] dfnames.extend(Dump.list_outfiles_to_check_for_truncation(self, dump_dir, dump_names)) return dfnames
def get_tmp_files(self, dump_dir, dump_names=None): """ list temporary output files currently existing returns: list of DumpFilename """ dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) return [dfname for dfname in dfnames if dfname.is_temp_file]
def list_truncated_empty_outfiles_for_input(self, dump_dir): """ returns: list of DumpFilename """ dump_names = self.list_dumpnames() dfnames = [] dfnames.extend(Dump.list_truncated_empty_outfiles_for_input(self, dump_dir, dump_names)) return dfnames
def list_outfiles_for_input(self, dump_dir, dump_names=None): """ returns: list of DumpFilename """ if dump_names is None: dump_names = self.list_dumpnames() dfnames = [] dfnames.extend(Dump.list_outfiles_for_input(self, dump_dir, dump_names)) return dfnames
def list_outfiles_for_cleanup(self, dump_dir, dump_names=None): """ list output files including checkpoint files currently existing (from the dump run for the current wiki and date), in case we have been requested to clean up before a retry args: DumpDir, list of dump names ("stub-meta-history", ...) returns: list of DumpFilename """ dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names) dfnames_to_return = [] if self.jobinfo['pageid_range']: # this file is for one page range only if ',' in self.jobinfo['pageid_range']: (first_page_id, last_page_id) = self.jobinfo['pageid_range'].split(',', 2) first_page_id = int(first_page_id) last_page_id = int(last_page_id) else: first_page_id = int(self.jobinfo['pageid_range']) last_page_id = None # checkpoint files cover specific page ranges. for those, # list only files within the given page range for cleanup for dfname in dfnames: if dfname.is_checkpoint_file: if (not first_page_id or (dfname.first_page_id and (int(dfname.first_page_id) >= first_page_id))): if (not last_page_id or (dfname.last_page_id and (int(dfname.last_page_id) <= last_page_id))): dfnames_to_return.append(dfname) else: dfnames_to_return.append(dfname) else: dfnames_to_return = dfnames return dfnames_to_return
def __init__(self, name, desc, item_for_xml_stubs): self.item_for_xml_stubs = item_for_xml_stubs self._prerequisite_items = [self.item_for_xml_stubs] Dump.__init__(self, name, desc) # the input may have checkpoints but the output will not. self._checkpoints_enabled = False
def __init__(self, desc, parts=False): Dump.__init__(self, "xmlpagelogsdump", desc)
def list_outfiles_to_publish(self, dump_dir): dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_to_publish(self, dump_dir, dump_names)) return files
def __init__(self, properties, name, desc): self._properties = properties self._parts_enabled = False Dump.__init__(self, name, desc)
def list_outfiles_to_check_for_truncation(self, dump_dir): dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_to_check_for_truncation(self, dump_dir, dump_names)) return files
def __init__(self, table, name, desc): self._table = table self._parts_enabled = False self.private = False Dump.__init__(self, name, desc)
def __init__(self, name, desc, history=False): self.history = history Dump.__init__(self, name, desc)
def list_outfiles_for_build_command(self, dump_dir): dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_for_build_command(self, dump_dir, dump_names)) return files
def list_outfiles_for_cleanup(self, dump_dir): dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)) return files
def list_outfiles_for_input(self, dump_dir, dump_names=None): if dump_names is None: dump_names = self.list_dumpnames() files = [] files.extend(Dump.list_outfiles_for_input(self, dump_dir, dump_names)) return files