Example #1
0
    def get_prev_revid(self, max_revid):
        '''
        get the previous rundate, with or without maxrevid file
        we can populate that file if need be
        '''
        prev_date = self.get_prev_incrdate(self.wiki.date)
        log.info("prev_date is %s", safe(prev_date))

        prev_revid = None

        if prev_date:
            cutoff = cutoff_from_date(prev_date, self.wiki.config)
            id_reader = MaxRevID(self.wiki, cutoff, self.dryrun)
            prev_revid = id_reader.read_max_revid_from_file(prev_date)

            if prev_revid is None:
                log.info("Wiki %s retrieving prevRevId from db.",
                         self.wiki.db_name)
                id_reader.record_max_revid()
                prev_revid = id_reader.max_id
        else:
            log.info("Wiki %s no previous runs, using %s - 10 ",
                     self.wiki.db_name, max_revid)
            prev_revid = str(int(max_revid) - 10)
            if int(prev_revid) < 1:
                prev_revid = str(1)

        # this incr will cover every revision from the last
        # incremental through the maxid we wrote out already.
        if prev_revid is not None:
            prev_revid = str(int(prev_revid) + 1)
        log.info("prev_revid is %s", safe(prev_revid))
        return prev_revid
 def get_outputfile_indextxt(self, filenames_tocheck, expected, wikiname, dump_date):
     '''
     generate and return a list of text strings that provide a
     link to the given files, along with filename, size and date.
     if the file does not exist, it will be silently excluded from
     the list.
     the expected list is a list of filenames that are expected to
     be produced by the dump; currently no errors are generated
     on this basis but this may change in the future.
     '''
     dirinfo = MiscDumpDir(self.args['config'], dump_date)
     path = dirinfo.get_dumpdir(wikiname)
     output_fileinfo = {}
     for filename in filenames_tocheck:
         output_fileinfo[filename] = FileUtils.file_info(os.path.join(path, filename))
     files_text = []
     filenames = sorted(output_fileinfo.keys())
     for filename in filenames:
         file_date, file_size = output_fileinfo[filename]
         self.log.info("output file %s for %s %s %s",
                       filename, wikiname, safe(file_date), safe(file_size))
         if filename in expected and file_date is None:
             # may do more with this sort of error in the future
             # for now, just get stats on the other files
             continue
         if file_date:
             files_text.append(
                 "%s: %s (size %s)<br />"
                 % (make_link(
                     os.path.join(
                         wikiname, dump_date,
                         filename),
                     os.path.basename(filename)), file_date, file_size))
     return files_text
 def get_stat_text(self, dump_date, wikiname):
     '''
     generate and return the text string describing
     the status of the dump of the wiki for the given date
     '''
     stat = StatusFile(self.args['config'], dump_date, wikiname)
     stat_contents = FileUtils.read_file(stat.get_path())
     self.log.info("status for %s %s", wikiname, safe(stat_contents))
     if stat_contents:
         stat_text = "(%s)" % (stat_contents)
     else:
         stat_text = None
     return stat_text
Example #4
0
    def dump_max_revid(self):
        '''
        dump maximum rev id from wiki that's older than
        the configured number of seconds (cutoff)

        we have this cutoff so that content really new
        is not dumped; we want to give curators the chance to
        remove problematic entries first.

        a cutoff of some hours is reasonable.
        '''
        max_revid = None
        revidfile = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
        if exists(revidfile.get_path()):
            self.log.info("Wiki %s, max rev id file %s already exists",
                          self.wiki.db_name, revidfile.get_path())
        else:
            self.log.info("Wiki %s retrieving max revid from db.",
                          self.wiki.db_name)
            query = ("select rev_id from revision where rev_timestamp < \"%s\" "
                     "order by rev_timestamp desc limit 1" % self.cutoff)
            db_info = DbServerInfo(self.wiki, self.wiki.db_name)
            results = db_info.run_sql_and_get_output(query)
            if results:
                lines = results.splitlines()
                if lines and lines[1] and lines[1].isdigit():
                    max_revid = lines[1]
                    if self.dryrun:
                        print("would write file {path} with contents {revid}".format(
                            path=revidfile.get_path(), revid=max_revid))
                    else:
                        FileUtils.write_file_in_place(
                            revidfile.get_path(), max_revid.decode('utf-8'),
                            self.wiki.config.fileperms)
        if not max_revid:
            try:
                file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
                max_revid = FileUtils.read_file(file_obj.get_path().rstrip())
            except Exception as ex:
                self.log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(),
                              exc_info=ex)
                max_revid = None

        # end rev id is not included in dump
        if max_revid is not None:
            max_revid = str(int(max_revid) + 1)

        self.log.info("max_revid is %s", safe(max_revid))
        return max_revid