Exemple #1
0
    def __init__(self, subset, name, desc, detail, item_for_stubs, prefetch,
                 prefetchdate, spawn,
                 wiki, partnum_todo, parts=False, checkpoints=False, checkpoint_file=None,
                 page_id_range=None, verbose=False):
        self._subset = subset
        self._detail = detail
        self._desc = desc
        self._prefetch = prefetch
        self._prefetchdate = prefetchdate
        self._spawn = spawn
        self._parts = parts
        if self._parts:
            self._parts_enabled = True
            self.onlyparts = True
        self._page_id = {}
        self._partnum_todo = partnum_todo

        self.wiki = wiki
        self.item_for_stubs = item_for_stubs
        if checkpoints:
            self._checkpoints_enabled = True
        self.checkpoint_file = checkpoint_file
        self.page_id_range = page_id_range
        self.verbose = verbose
        self._prerequisite_items = [self.item_for_stubs]
        self._check_truncation = True
        Dump.__init__(self, name, desc, self.verbose)
Exemple #2
0
 def __init__(self, name, desc, item_for_recombine):
     # no partnum_todo, no parts generally (False, False), even though input may have it
     self.item_for_recombine = item_for_recombine
     self._prerequisite_items = [self.item_for_recombine]
     Dump.__init__(self, name, desc)
     # the input may have checkpoints but the output will not.
     self._checkpoints_enabled = False
    def __init__(self, subset, name, desc, detail, item_for_stubs, prefetch,
                 prefetchdate, spawn,
                 wiki, partnum_todo, parts=False, checkpoints=False, checkpoint_file=None,
                 page_id_range=None, verbose=False):
        self.jobinfo = {'subset': subset, 'detail': detail, 'desc': desc,
                        'prefetch': prefetch, 'prefetchdate': prefetchdate,
                        'spawn': spawn, 'partnum_todo': partnum_todo,
                        'pageid_range': page_id_range, 'item_for_stubs': item_for_stubs}
        if checkpoints:
            self._checkpoints_enabled = True
        self.checkpoint_file = checkpoint_file
        self._parts = parts
        if self._parts:
            self._parts_enabled = True
            self.onlyparts = True

        self.wiki = wiki
        self.verbose = verbose
        self._prerequisite_items = [self.jobinfo['item_for_stubs']]
        self.stubber = StubProvider(
            self.wiki, {'dumpname': self.get_dumpname(), 'parts': self._parts,
                        'dumpnamebase': self.get_dumpname_base(),
                        'item_for_stubs': item_for_stubs,
                        'partnum_todo': self.jobinfo['partnum_todo']},
            self.verbose)
        Dump.__init__(self, name, desc, self.verbose)
Exemple #4
0
 def __init__(self, name, desc, partnum_todo, db_name, parts=False):
     self._partnum_todo = partnum_todo
     self._parts = parts
     if self._parts:
         self._parts_enabled = True
         self.onlyparts = True
     self.db_name = db_name
     Dump.__init__(self, name, desc)
 def __init__(self, desc, partnum_todo, jobsperbatch=None, parts=False):
     self._partnum_todo = partnum_todo
     self.jobsperbatch = jobsperbatch
     self._parts = parts
     if self._parts:
         self._parts_enabled = True
         self.onlyparts = True
     Dump.__init__(self, "xmlpagelogsdump", desc)
Exemple #6
0
 def __init__(self, name, desc, detail, item_for_xml_dumps):
     # no prefetch, no spawn
     self.item_for_xml_dumps = item_for_xml_dumps
     self._detail = detail
     self._prerequisite_items = [self.item_for_xml_dumps]
     Dump.__init__(self, name, desc)
     # the input may have checkpoints but the output will not.
     self._checkpoints_enabled = False
Exemple #7
0
 def __init__(self, name, desc, detail, item_for_recombine, wiki):
     self._detail = detail
     self._desc = desc
     self.wiki = wiki
     self.item_for_recombine = item_for_recombine
     self._prerequisite_items = [self.item_for_recombine]
     Dump.__init__(self, name, desc)
     # the input may have checkpoints but the output will not.
     self._checkpoints_enabled = False
     self._parts_enabled = False
 def __init__(self, name, desc, partnum_todo, jobsperbatch=None, parts=False, checkpoints=False):
     self._partnum_todo = partnum_todo
     self.jobsperbatch = jobsperbatch
     self._parts = parts
     if self._parts:
         self._parts_enabled = True
         self.onlyparts = True
     self.history_dump_name = "stub-meta-history"
     self.current_dump_name = "stub-meta-current"
     self.articles_dump_name = "stub-articles"
     if checkpoints:
         self._checkpoints_enabled = True
     Dump.__init__(self, name, desc)
Exemple #9
0
 def __init__(self, subset, name, desc, detail, item_for_recompression, wiki,
              partnum_todo, parts=False, checkpoints=False, checkpoint_file=None):
     self._subset = subset
     self._detail = detail
     self._parts = parts
     if self._parts:
         self._parts_enabled = True
     self._partnum_todo = partnum_todo
     self.wiki = wiki
     self.item_for_recompression = item_for_recompression
     if checkpoints:
         self._checkpoints_enabled = True
     self.checkpoint_file = checkpoint_file
     self._prerequisite_items = [self.item_for_recompression]
     Dump.__init__(self, name, desc)
Exemple #10
0
    def list_outfiles_for_cleanup(self, dump_dir, dump_names=None):
        files = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)
        files_to_return = []

        if self.page_id_range:
            # this file is for one page range only
            if ',' in self.page_id_range:
                (first_page_id, last_page_id) = self.page_id_range.split(',', 2)
                first_page_id = int(first_page_id)
                last_page_id = int(last_page_id)
            else:
                first_page_id = int(self.page_id_range)
                last_page_id = None

            # checkpoint files cover specific page ranges. for those,
            # list only files within the given page range for cleanup
            for fname in files:
                if fname.is_checkpoint_file:
                    if (not first_page_id or
                            (fname.first_page_id and
                             (int(fname.first_page_id) >= first_page_id))):
                        if (not last_page_id or
                                (fname.last_page_id and
                                 (int(fname.last_page_id) <= last_page_id))):
                            files_to_return.append(fname)
                else:
                    files_to_return.append(fname)
        else:
            files_to_return = files

        return files_to_return
Exemple #11
0
 def list_outfiles_for_cleanup(self, dump_dir):
     """
     returns: list of DumpFilename
     """
     dump_names = self.list_dumpnames()
     dfnames = []
     dfnames.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names))
     return dfnames
Exemple #12
0
 def list_outfiles_to_check_for_truncation(self, dump_dir):
     """
     returns: list of DumpFilename
     """
     dump_names = self.list_dumpnames()
     dfnames = []
     dfnames.extend(Dump.list_outfiles_to_check_for_truncation(self, dump_dir, dump_names))
     return dfnames
 def get_tmp_files(self, dump_dir, dump_names=None):
     """
     list temporary output files currently existing
     returns:
         list of DumpFilename
     """
     dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)
     return [dfname for dfname in dfnames if dfname.is_temp_file]
Exemple #14
0
 def list_truncated_empty_outfiles_for_input(self, dump_dir):
     """
     returns: list of DumpFilename
     """
     dump_names = self.list_dumpnames()
     dfnames = []
     dfnames.extend(Dump.list_truncated_empty_outfiles_for_input(self, dump_dir, dump_names))
     return dfnames
Exemple #15
0
 def list_outfiles_for_input(self, dump_dir, dump_names=None):
     """
     returns: list of DumpFilename
     """
     if dump_names is None:
         dump_names = self.list_dumpnames()
     dfnames = []
     dfnames.extend(Dump.list_outfiles_for_input(self, dump_dir, dump_names))
     return dfnames
    def list_outfiles_for_cleanup(self, dump_dir, dump_names=None):
        """
        list output files including checkpoint files currently existing
        (from the dump run for the current wiki and date), in case
        we have been requested to clean up before a retry

        args:
            DumpDir, list of dump names ("stub-meta-history", ...)
        returns:
            list of DumpFilename
        """
        dfnames = Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names)
        dfnames_to_return = []

        if self.jobinfo['pageid_range']:
            # this file is for one page range only
            if ',' in self.jobinfo['pageid_range']:
                (first_page_id, last_page_id) = self.jobinfo['pageid_range'].split(',', 2)
                first_page_id = int(first_page_id)
                last_page_id = int(last_page_id)
            else:
                first_page_id = int(self.jobinfo['pageid_range'])
                last_page_id = None

            # checkpoint files cover specific page ranges. for those,
            # list only files within the given page range for cleanup
            for dfname in dfnames:
                if dfname.is_checkpoint_file:
                    if (not first_page_id or
                            (dfname.first_page_id and
                             (int(dfname.first_page_id) >= first_page_id))):
                        if (not last_page_id or
                                (dfname.last_page_id and
                                 (int(dfname.last_page_id) <= last_page_id))):
                            dfnames_to_return.append(dfname)
                else:
                    dfnames_to_return.append(dfname)
        else:
            dfnames_to_return = dfnames

        return dfnames_to_return
Exemple #17
0
 def __init__(self, name, desc, item_for_xml_stubs):
     self.item_for_xml_stubs = item_for_xml_stubs
     self._prerequisite_items = [self.item_for_xml_stubs]
     Dump.__init__(self, name, desc)
     # the input may have checkpoints but the output will not.
     self._checkpoints_enabled = False
Exemple #18
0
 def __init__(self, desc, parts=False):
     Dump.__init__(self, "xmlpagelogsdump", desc)
Exemple #19
0
 def list_outfiles_to_publish(self, dump_dir):
     dump_names = self.list_dumpnames()
     files = []
     files.extend(Dump.list_outfiles_to_publish(self, dump_dir, dump_names))
     return files
Exemple #20
0
 def __init__(self, properties, name, desc):
     self._properties = properties
     self._parts_enabled = False
     Dump.__init__(self, name, desc)
Exemple #21
0
 def list_outfiles_to_check_for_truncation(self, dump_dir):
     dump_names = self.list_dumpnames()
     files = []
     files.extend(Dump.list_outfiles_to_check_for_truncation(self, dump_dir, dump_names))
     return files
 def __init__(self, table, name, desc):
     self._table = table
     self._parts_enabled = False
     self.private = False
     Dump.__init__(self, name, desc)
Exemple #23
0
 def __init__(self, name, desc, history=False):
     self.history = history
     Dump.__init__(self, name, desc)
Exemple #24
0
 def list_outfiles_for_build_command(self, dump_dir):
     dump_names = self.list_dumpnames()
     files = []
     files.extend(Dump.list_outfiles_for_build_command(self, dump_dir, dump_names))
     return files
Exemple #25
0
 def list_outfiles_for_cleanup(self, dump_dir):
     dump_names = self.list_dumpnames()
     files = []
     files.extend(Dump.list_outfiles_for_cleanup(self, dump_dir, dump_names))
     return files
Exemple #26
0
 def list_outfiles_for_input(self, dump_dir, dump_names=None):
     if dump_names is None:
         dump_names = self.list_dumpnames()
     files = []
     files.extend(Dump.list_outfiles_for_input(self, dump_dir, dump_names))
     return files