Exemple #1
 def record_max_revid(self):
     get max rev id for wiki from db, save it to file
     if not self.dryrun:
         file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
         FileUtils.write_file_in_place(file_obj.get_path(), self.max_id,
 def do_all_wikis(self):
     generate index.html file for all wikis for the given date.
     FIXME maybe this should be for the latest run date? Hrm.
     text = ""
     for wikiname in self.args["config"].all_wikis_list:
         result = self.do_one_wiki(wikiname)
         if result:
             log.info("result for wiki %s is %s", wikiname, result)
             text = text + "<li>" + result + "</li>\n"
     index_text = self.args["config"].read_template(self.args["config"].indextmpl) % {"items": text}
     FileUtils.write_file_in_place(self.indexfile.get_path(), index_text, self.args["config"].fileperms)
Exemple #3
 def get_prev_incrdate(self, date, dumpok=False, revidok=False):
     find the most recent incr dump before the
     specified date
     if "dumpok" is True, find most recent dump that completed successfully
     if "revidok" is True, find most recent dump that has a populated maxrevid.txt file
     previous = None
     old = self.dirs.get_misc_dumpdirs()
     if old:
         for dump in old:
             if dump == date:
                 return previous
                 if dumpok:
                     status_info = StatusInfo(self.wiki.config, dump, self.wiki.db_name)
                     if status_info.get_status(dump) == "done":
                         previous = dump
                 elif revidok:
                     max_revid_file = MaxRevIDFile(self.wiki.config, dump, self.wiki.db_name)
                     if exists(max_revid_file.get_path()):
                         revid = FileUtils.read_file(max_revid_file.get_path().rstrip())
                         if int(revid) > 0:
                             previous = dump
                     previous = dump
     return previous
 def get_outputfile_indextxt(self, filenames_tocheck, expected, wikiname, dump_date):
     generate and return a list of text strings that provide a
     link to the given files, along with filename, size and date.
     if the file does not exist, it will be silently excluded from
     the list.
     the expected list is a list of filenames that are expected to
     be produced by the dump; currently no errors are generated
     on this basis but this may change in the future.
     dirinfo = MiscDumpDir(self.args["config"], dump_date)
     path = dirinfo.get_dumpdir(wikiname)
     output_fileinfo = {}
     for filename in filenames_tocheck:
         output_fileinfo[filename] = FileUtils.file_info(os.path.join(path, filename))
     files_text = []
     filenames = sorted(output_fileinfo.keys())
     for filename in filenames:
         file_date, file_size = output_fileinfo[filename]
         log.info("output file %s for %s %s %s", filename, wikiname, safe(file_date), safe(file_size))
         if filename in expected and file_date is None:
             # may do more with this sort of error in the future
             # for now, just get stats on the other files
         if file_date:
                 "%s: %s (size %s)<br />"
                 # FIXME check that this link is correct
                 % (
                     make_link(os.path.join(wikiname, dump_date, filename), os.path.basename(filename)),
     return files_text
Exemple #5
 def dump_aliases(self):
     returns True on success
     False or exception on error are fine
     if not self.steps['aliases']['run']:
         return True
         contents = "for wiki %s: alias meow=more\n" % self.wiki.db_name
         aliasesfile = AliasesFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
                                       contents, self.wiki.config.fileperms)
         return True
     except Exception as ex:
         log.info("Error encountered dumping namespaces for %s ", self.wiki.db_name,
Exemple #6
 def get_status(self, date=None):
     return the status of the dump run for the given wiki and date,
     or the empty string if there is no run or no information available
     status = ""
     if exists(self.status_file.get_path(date)):
         status = FileUtils.read_file(self.status_file.get_path(date)).rstrip()
     return status
Exemple #7
def md5sums(wiki, fileperms, files, mandatory):
    generate md5sums for specified files for dump of
    given wiki and specific date, and save them to
    output file
    md5file = MD5File(wiki.config, wiki.date, wiki.db_name)
    text = ""
    errors = False
    for fname in files:
            text = text + "%s\n" % md5sum_one_file(fname)
                                          text, fileperms)
        except Exception as ex:
            log.warning("Error encountered in md5sum for %s", fname, exc_info=ex)
            if fname in mandatory:
                errors = True
    return not errors
Exemple #8
    def dump_max_revid(self):
        dump maximum rev id from wiki that's older than
        the configured number of seconds (cutoff)

        we have this cutoff so that content really new
        is not dumped; we want to give curators the chance to
        remove problematic entries first.

        a cutoff of some hours is reasonable.
        max_id = None
        revidfile = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
        if not exists(revidfile.get_path()):
            log.info("Wiki %s retrieving max revid from db.",
            query = ("select rev_id from revision where rev_timestamp < \"%s\" "
                     "order by rev_timestamp desc limit 1" % self.cutoff)
            db_info = DbServerInfo(self.wiki, self.wiki.db_name)
            results = db_info.run_sql_and_get_output(query)
            if results:
                lines = results.splitlines()
                if lines and lines[1] and lines[1].isdigit():
                    max_id = lines[1]
                                                  max_id, self.wiki.config.fileperms)
            file_obj = MaxRevIDFile(self.wiki.config, self.wiki.date, self.wiki.db_name)
            max_revid = FileUtils.read_file(file_obj.get_path().rstrip())
        except Exception as ex:
            log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(),
            max_revid = None

        # end rev id is not included in dump
        if max_revid is not None:
            max_revid = str(int(max_revid) + 1)

        log.info("max_revid is %s", safe(max_revid))
        return max_revid
Exemple #9
 def read_max_revid_from_file(self, date=None):
     read and return max rev id for wiki from file
     if date is None:
         date = self.wiki.date
         file_obj = MaxRevIDFile(self.wiki.config, date, self.wiki.db_name)
         return FileUtils.read_file(file_obj.get_path().rstrip())
     except Exception as ex:
         log.info("Error encountered reading maxrevid from %s ", file_obj.get_path(),
         return None
 def get_stat_text(self, dump_date, wikiname):
     generate and return the text string describing
     the status of the dump of the wiki for the given date
     stat = StatusFile(self.args["config"], dump_date, wikiname)
     stat_contents = FileUtils.read_file(stat.get_path())
     log.info("status for %s %s", wikiname, safe(stat_contents))
     if stat_contents:
         stat_text = "(%s)" % (stat_contents)
         stat_text = None
     return stat_text
Exemple #11
 def get_lock(self):
     acquire lock for wiki and return True.
     if it does not exist, create it
     return False if lock could not be acquired
         if not exists(self._config.dump_dir):
         fhandle = FileUtils.atomic_create(self.lockfile.get_path(), "w")
         fcntl.lockf(fhandle.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
         fhandle.write("%s %d" % (socket.getfqdn(), os.getpid()))
         return True
     except Exception as ex:
         log.info("Error encountered getting lock", exc_info=ex)
         return False
Exemple #12
 def get_fileinfo(self):
     return a FileInfo object corresponding to the file
     return FileUtils.file_info(self.get_path())
Exemple #13
 def read_template(self, name):
     read a file out of the configured template dir and return the contents
     template = os.path.join(self.template_dir, name)
     return FileUtils.read_file(template)
Exemple #14
 def _get_lockfile_contents(self):
         contents = FileUtils.read_file(self.lockfile.get_path(self.date))
         return contents.split()
     except Exception:
         return None, None
Exemple #15
 def set_status(self, status):
     write out the status information supplied for the dump run
     FileUtils.write_file_in_place(self.status_file.get_path(), status, self._config.fileperms)