def main(cfg): """ Migration Purge Record Ager (mpra) reads the database tables BFMIGRREC and BFPURGEREC and reports migration and purge records that are older than the age specified in the configuration. """ if cfg is None: cfg = CrawlConfig.get_config() age = cfg.get_time('mpra', 'age') end = time.time() - age start = mpra_lib.mpra_fetch_recent("migr") # # If the configured age has been moved back in time, so that end is before # start, we need to reset and start scanning from the beginning of time. # if end < start: start = 0 CrawlConfig.log("migr recs after %d (%s) before %d (%s)" % (start, util.ymdhms(start), end, util.ymdhms(end))) result = mpra_lib.age("migr", start=start, end=end, mark=True) CrawlConfig.log("found %d migration records in the range" % result) rval = result start = mpra_lib.mpra_fetch_recent("purge") CrawlConfig.log("Looking for expired purge locks") result = mpra_lib.xplocks(mark=True) CrawlConfig.log("found %d expired purge locks" % result) rval += result return rval
def test_ctor_reset_atime_cfg_true(self): """ If reset_atime is specified in the config as True, it should be True """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec self.write_cfg_file(cf_name, self.cfg_d) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_ctor_reset_atime_call_true(self): """ If reset_atime is specified in the call as True, it should be True, even if it's specified as False in the config """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cfg = copy.deepcopy(self.cfg_d) cfg['cv']['reset_atime'] = 'no' self.write_cfg_file(cf_name, cfg) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False, reset_atime=True) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_ctor_reset_atime_default(self): """ If reset_atime is not specified in the config or argument list, it should default to False """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cd = copy.deepcopy(self.cfg_d) del cd['cv']['reset_atime'] self.write_cfg_file(cf_name, cd) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(False, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def tcc_priority(globspec, cosinfo): """ Handle any files matching globspec. Return the number of files processed. """ rval = 0 cfg = CrawlConfig.get_config() pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp') for filepath in glob.glob(globspec): tcc_lib.check_file(filepath, verbose=False, plugin=True) cpath = U.pathjoin(pri_compdir, U.basename(filepath)) os.rename(filepath, cpath) return rval
def check_hash_algorithm(self, cf_stem, alg, checkfor=None): """ With hash_algorithm = *alg* in config """ if checkfor is None: checkfor = alg # generate a config file and make it the default config cf_name = self.tmpdir(cf_stem + ".cfg") cd = copy.deepcopy(self.cfg_d) if alg == '(none)': del cd['cv']['hash_algorithm'] else: cd['cv']['hash_algorithm'] = alg self.write_cfg_file(cf_name, cd) CrawlConfig.get_config(cfname=cf_name, reset=True) # Get an hsi object testfile = self.plist[1] try: h = hpss.HSI() except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e)) # if necessary, delete any hash on the test file result = h.hashlist(testfile) if "(none)" not in result: h.hashdelete(testfile) # generate a hash on the test file h.hashcreate(testfile) # verify that the hash created is of the proper type result = h.hashlist(testfile) self.expected_in(checkfor, result)
def main(cfg): """ This plugin will generate a report and send it to the designated e-mail address(es). """ rval = 0 try: if cfg is None: cfg = CrawlConfig.get_config() subject = "%s %s" % (cfg.get('rpt', 'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime())) CrawlMail.send(sender=cfg.get('rpt', 'sender'), to='rpt.recipients', subj=subject, msg=rpt_lib.get_report()) except Exception as e: rval = 1 CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e)) return rval
def main(cfg): """ This plugin will generate a report and send it to the designated e-mail address(es). """ rval = 0 try: if cfg is None: cfg = CrawlConfig.get_config() subject = "%s %s" % (cfg.get( 'rpt', 'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime())) CrawlMail.send(sender=cfg.get('rpt', 'sender'), to='rpt.recipients', subj=subject, msg=rpt_lib.get_report()) except Exception as e: rval = 1 CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e)) return rval
last_obj_id, last_obj_id, correct, error) errcount += error CrawlConfig.log("last nsobject in range: %d" % last_obj_id) return errcount # ----------------------------------------------------------------------------- def tcc_priority(globspec, cosinfo): """ Handle any files matching globspec. Return the number of files processed. """ rval = 0 cfg = CrawlConfig.get_config() pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp') for filepath in glob.glob(globspec): tcc_lib.check_file(filepath, verbose=False, plugin=True) cpath = U.pathjoin(pri_compdir, U.basename(filepath)) os.rename(filepath, cpath) return rval # ----------------------------------------------------------------------------- if __name__ == '__main__': main(CrawlConfig.get_config())
"failures: %d" % t_failures) # Report the dimension data in the log d = Dimension.Dimension(name='cos') t = Dimension.Dimension(name='cart') CrawlConfig.log(d.report()) CrawlConfig.log(t.report()) return failures # ----------------------------------------------------------------------------- def get_stats(): """ Return the number of files checksummed, checksums matched, and checksums failed. Matches and failures are stored in the cvstats table but total checksum count is retrieved from the checkables table by counting records with checksum = 1. This avoids discrepancies where the checksum count in cvstats might get out of synch with the records in checkables. """ checksums = cv_lib.get_checksum_count() (matches, failures) = cv_lib.get_match_fail_count() return(checksums, matches, failures) # ----------------------------------------------------------------------------- if __name__ == '__main__': cfg = CrawlConfig.get_config() main(cfg)
"matches: %d; " % t_matches + "failures: %d" % t_failures) # Report the dimension data in the log d = Dimension.Dimension(name='cos') t = Dimension.Dimension(name='cart') CrawlConfig.log(d.report()) CrawlConfig.log(t.report()) return failures # ----------------------------------------------------------------------------- def get_stats(): """ Return the number of files checksummed, checksums matched, and checksums failed. Matches and failures are stored in the cvstats table but total checksum count is retrieved from the checkables table by counting records with checksum = 1. This avoids discrepancies where the checksum count in cvstats might get out of synch with the records in checkables. """ checksums = cv_lib.get_checksum_count() (matches, failures) = cv_lib.get_match_fail_count() return (checksums, matches, failures) # ----------------------------------------------------------------------------- if __name__ == '__main__': cfg = CrawlConfig.get_config() main(cfg)
last_obj_id = int(bf['OBJECT_ID']) tcc_lib.record_checked_ids(cfg, last_obj_id, last_obj_id, correct, error) errcount += error CrawlConfig.log("last nsobject in range: %d" % last_obj_id) return errcount # ----------------------------------------------------------------------------- def tcc_priority(globspec, cosinfo): """ Handle any files matching globspec. Return the number of files processed. """ rval = 0 cfg = CrawlConfig.get_config() pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp') for filepath in glob.glob(globspec): tcc_lib.check_file(filepath, verbose=False, plugin=True) cpath = U.pathjoin(pri_compdir, U.basename(filepath)) os.rename(filepath, cpath) return rval # ----------------------------------------------------------------------------- if __name__ == '__main__': main(CrawlConfig.get_config())