Example #1
0
 def record_max_revid(self):
     '''
     get max rev id for wiki from db, save it to file
     '''
     self.get_max_revid()
     if not self.dryrun:
         file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
         FileUtils.write_file_in_place(file_obj.get_path(), self.max_id,
                                       self.wiki.config.fileperms)
 def do_all_wikis(self):
     """
     generate index.html file for all wikis for the given date.
     FIXME maybe this should be for the latest run date? Hrm.
     """
     text = ""
     for wikiname in self.args["config"].all_wikis_list:
         result = self.do_one_wiki(wikiname)
         if result:
             log.info("result for wiki %s is %s", wikiname, result)
             text = text + "<li>" + result + "</li>\n"
     index_text = self.args["config"].read_template(self.args["config"].indextmpl) % {"items": text}
     FileUtils.write_file_in_place(self.indexfile.get_path(), index_text, self.args["config"].fileperms)
Example #3
0
 def get_prev_incrdate(self, date, dumpok=False, revidok=False):
     '''
     find the most recent incr dump before the
     specified date
     if "dumpok" is True, find most recent dump that completed successfully
     if "revidok" is True, find most recent dump that has a populated maxrevid.txt file
     '''
     previous = None
     old = self.dirs.get_misc_dumpdirs()
     if old:
         for dump in old:
             if dump == date:
                 return previous
             else:
                 if dumpok:
                     status_info = StatusInfo(self.wiki.config, dump, self.wiki.db_name)
                     if status_info.get_status(dump) == "done":
                         previous = dump
                 elif revidok:
                     max_revid_file = MaxRevIDFile(self.wiki.config, dump, self.wiki.db_name)
                     if exists(max_revid_file.get_path()):
                         revid = FileUtils.read_file(max_revid_file.get_path().rstrip())
                         if int(revid) > 0:
                             previous = dump
                 else:
                     previous = dump
     return previous
 def get_outputfile_indextxt(self, filenames_tocheck, expected, wikiname, dump_date):
     """
     generate and return a list of text strings that provide a
     link to the given files, along with filename, size and date.
     if the file does not exist, it will be silently excluded from
     the list.
     the expected list is a list of filenames that are expected to
     be produced by the dump; currently no errors are generated
     on this basis but this may change in the future.
     """
     dirinfo = MiscDumpDir(self.args["config"], dump_date)
     path = dirinfo.get_dumpdir(wikiname)
     output_fileinfo = {}
     for filename in filenames_tocheck:
         output_fileinfo[filename] = FileUtils.file_info(os.path.join(path, filename))
     files_text = []
     filenames = sorted(output_fileinfo.keys())
     for filename in filenames:
         file_date, file_size = output_fileinfo[filename]
         log.info("output file %s for %s %s %s", filename, wikiname, safe(file_date), safe(file_size))
         if filename in expected and file_date is None:
             # may do more with this sort of error in the future
             # for now, just get stats on the other files
             continue
         if file_date:
             files_text.append(
                 "%s: %s (size %s)<br />"
                 # FIXME check that this link is correct
                 % (
                     make_link(os.path.join(wikiname, dump_date, filename), os.path.basename(filename)),
                     file_date,
                     file_size,
                 )
             )
     return files_text
Example #5
0
 def dump_aliases(self):
     '''
     returns True on success
     False or exception on error are fine
     '''
     if not self.steps['aliases']['run']:
         return True
     try:
         contents = "for wiki %s: alias meow=more\n" % self.wiki.db_name
         aliasesfile = AliasesFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
         FileUtils.write_file_in_place(aliasesfile.get_path(),
                                       contents, self.wiki.config.fileperms)
         return True
     except Exception as ex:
         log.info("Error encountered dumping namespaces for %s ", self.wiki.db_name,
                  exc_info=ex)
         raise
Example #6
0
 def get_status(self, date=None):
     '''
     return the status of the dump run for the given wiki and date,
     or the empty string if there is no run or no information available
     '''
     status = ""
     if exists(self.status_file.get_path(date)):
         status = FileUtils.read_file(self.status_file.get_path(date)).rstrip()
     return status
Example #7
0
def md5sums(wiki, fileperms, files, mandatory):
    '''
    generate md5sums for specified files for dump of
    given wiki and specific date, and save them to
    output file
    '''
    md5file = MD5File(wiki.config, wiki.date, wiki.db_name)
    text = ""
    errors = False
    for fname in files:
        try:
            text = text + "%s\n" % md5sum_one_file(fname)
            FileUtils.write_file_in_place(md5file.get_path(),
                                          text, fileperms)
        except Exception as ex:
            log.warning("Error encountered in md5sum for %s", fname, exc_info=ex)
            if fname in mandatory:
                errors = True
    return not errors
Example #8
0
    def dump_max_revid(self):
        '''
        dump maximum rev id from wiki that's older than
        the configured number of seconds (cutoff)

        we have this cutoff so that content really new
        is not dumped; we want to give curators the chance to
        remove problematic entries first.

        a cutoff of some hours is reasonable.
        '''
        max_id = None
        revidfile = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
        if not exists(revidfile.get_path()):
            log.info("Wiki %s retrieving max revid from db.",
                     self.wiki.db_name)
            query = ("select rev_id from revision where rev_timestamp < \"%s\" "
                     "order by rev_timestamp desc limit 1" % self.cutoff)
            db_info = DbServerInfo(self.wiki, self.wiki.db_name)
            results = db_info.run_sql_and_get_output(query)
            if results:
                lines = results.splitlines()
                if lines and lines[1] and lines[1].isdigit():
                    max_id = lines[1]
                    FileUtils.write_file_in_place(revidfile.get_path(),
                                                  max_id, self.wiki.config.fileperms)
        try:
            file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
            max_revid = FileUtils.read_file(file_obj.get_path().rstrip())
        except Exception as ex:
            log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(),
                     exc_info=ex)
            max_revid = None

        # end rev id is not included in dump
        if max_revid is not None:
            max_revid = str(int(max_revid) + 1)

        log.info("max_revid is %s", safe(max_revid))
        return max_revid
Example #9
0
 def read_max_revid_from_file(self, date=None):
     '''
     read and return max rev id for wiki from file
     '''
     if date is None:
         date = self.wiki.date
     try:
         file_obj = MaxRevIDFile(self.wiki.config, date, self.wiki.db_name)
         return FileUtils.read_file(file_obj.get_path().rstrip())
     except Exception as ex:
         log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(),
                  exc_info=ex)
         return None
 def get_stat_text(self, dump_date, wikiname):
     """
     generate and return the text string describing
     the status of the dump of the wiki for the given date
     """
     stat = StatusFile(self.args["config"], dump_date, wikiname)
     stat_contents = FileUtils.read_file(stat.get_path())
     log.info("status for %s %s", wikiname, safe(stat_contents))
     if stat_contents:
         stat_text = "(%s)" % (stat_contents)
     else:
         stat_text = None
     return stat_text
Example #11
0
 def get_lock(self):
     '''
     acquire lock for wiki and return True.
     if it does not exist, create it
     return False if lock could not be acquired
     '''
     try:
         if not exists(self._config.dump_dir):
             os.makedirs(self._config.dump_dir)
         fhandle = FileUtils.atomic_create(self.lockfile.get_path(), "w")
         fcntl.lockf(fhandle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
         fhandle.write("%s %d" % (socket.getfqdn(), os.getpid()))
         fhandle.close()
         return True
     except Exception as ex:
         log.info("Error encountered getting lock", exc_info=ex)
         return False
Example #12
0
 def get_fileinfo(self):
     '''
     return a FileInfo object corresponding to the file
     '''
     return FileUtils.file_info(self.get_path())
Example #13
0
 def read_template(self, name):
     '''
     read a file out of the configured template dir and return the contents
     '''
     template = os.path.join(self.template_dir, name)
     return FileUtils.read_file(template)
Example #14
0
 def _get_lockfile_contents(self):
     try:
         contents = FileUtils.read_file(self.lockfile.get_path(self.date))
         return contents.split()
     except Exception:
         return None, None
Example #15
0
 def set_status(self, status):
     '''
     write out the status information supplied for the dump run
     '''
     FileUtils.write_file_in_place(self.status_file.get_path(), status, self._config.fileperms)