def test_ctor_attrs(self): """ Verify that a newly created Dimension object has the following attributes: - name (string) - sampsize (small float value, e.g., 0.005) - p_sum (empty dict) - s_sum (empty dict) - methods > sum_total > load """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name=dimname, sampsize=0.005) for attr in [ 'name', 'sampsize', 'p_sum', 's_sum', 'sum_total', 'load', ]: self.assertTrue( hasattr(a, attr), "Object %s does not have expected attribute %s" % (a, attr))
def test_sum_total(self): """ Return the sum of all the 'count' values in either the p_sum or s_sum dictionary. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name='sum_total') a.p_sum = { '6001': { 'count': 2, 'pct': 50.0 }, '5081': { 'count': 2, 'pct': 50.0 } } a.s_sum = { '6001': { 'count': 2, 'pct': 40.0 }, '5081': { 'count': 3, 'pct': 60.0 } } self.expected(4, a.sum_total()) self.expected(4, a.sum_total(dict=a.p_sum)) self.expected(5, a.sum_total(which='s')) self.expected(5, a.sum_total(dict=a.s_sum))
def test_alert_email_mtcaller(self): """ Generate an e-mail alert and verify that it was sent (this is where we use 'monkey patching'). For this case, caller is ''. """ self.dbgfunc() fakesmtp.inbox = [] logfile = self.tmpdir('alert_email.log') targets = "[email protected], [email protected], [email protected]" payload = 'this is an e-mail alert' sender = 'hpssic@' + util.hostname(long=True) cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('alerts') cfg.set('crawler', 'logpath', logfile) cfg.set('alerts', 'email', targets) CrawlConfig.log(logpath=logfile, close=True) x = Alert.Alert(caller='', msg=payload, cfg=cfg) m = fakesmtp.inbox[0] self.expected(targets, ', '.join(m.to_address)) self.expected(m.from_address, sender) self.expected_in('sent mail to', util.contents(logfile)) self.expected_in(payload, m.fullmessage)
def test_ctor_attrs(self): """ Verify that a newly created Dimension object has the following attributes: - name (string) - sampsize (small float value, e.g., 0.005) - p_sum (empty dict) - s_sum (empty dict) - methods > sum_total > load """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name=dimname, sampsize=0.005) for attr in ['name', 'sampsize', 'p_sum', 's_sum', 'sum_total', 'load', ]: self.assertTrue(hasattr(a, attr), "Object %s does not have expected attribute %s" % (a, attr))
def test_alert_shell_nospec(self): """ Generate a shell alert and verify that it ran. With no '%s' in the shell alert string, no message should be offered for formatting. """ self.dbgfunc() logfile = self.tmpdir('alert_shell.log') outfile = self.tmpdir('alert_shell.out') runfile = self.tmpdir('runme') f = open(runfile, 'w') f.write("#!/bin/bash\n") f.write("echo \"ALERT: $*\" > %s\n" % outfile) f.close() os.chmod( runfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'shell', runfile) CrawlConfig.log(logpath=logfile, close=True) x = Alert.Alert(caller='AlertTest', msg='this is a test message', cfg=cfg) expected = "ran: '%s'" % runfile self.expected_in(expected, util.contents(logfile)) self.assertPathPresent(outfile)
def test_alert_shell_nospec(self): """ Generate a shell alert and verify that it ran. With no '%s' in the shell alert string, no message should be offered for formatting. """ self.dbgfunc() logfile = self.tmpdir('alert_shell.log') outfile = self.tmpdir('alert_shell.out') runfile = self.tmpdir('runme') f = open(runfile, 'w') f.write("#!/bin/bash\n") f.write("echo \"ALERT: $*\" > %s\n" % outfile) f.close() os.chmod(runfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH) cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'shell', runfile) CrawlConfig.log(logpath=logfile, close=True) x = Alert.Alert(caller='AlertTest', msg='this is a test message', cfg=cfg) expected = "ran: '%s'" % runfile self.expected_in(expected, util.contents(logfile)) self.assertPathPresent(outfile)
def test_load_new(self): """ With the database and checkables table in place, create a new Dimension that is not in the table. Calling load() on it should be a no-op -- the object should not be stored to the database and its contents should not be changed. """ self.dbgfunc() # reboot the database and call persist() to create the table without # adding any data U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() ignore = Dimension(name='foobar') # get a Dimension object that is not in the table test = Dimension(name='notindb') # make a copy of the object for reference (not just a handle to the # same ojbect) ref = copy.deepcopy(test) # call load(), which should be a no op test.load() # verify that the object didn't change self.expected(ref.name, test.name) self.expected(ref.sampsize, test.sampsize) self.expected(ref.p_sum, test.p_sum) self.expected(ref.s_sum, test.s_sum)
def test_repr(self): """ Method __repr__ should return <Dimension(name='foo')>. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) exp = "Dimension(name='foo')" a = eval(exp) self.expected(exp, a.__repr__())
def test_load_already(self): """ With the database and a checkables table in place and records in the table, calling load() on a Dimension should load the information from the table into the object. However, it should only count records where last_check <> 0. """ self.dbgfunc() U.conditional_rm(self.dbname()) CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() chk = Checkable.Checkable testdata = [ chk(rowid=1, path="/abc/001", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=2, path="/abc/002", type='f', cos='6002', checksum=0, last_check=5), chk(rowid=3, path="/abc/003", type='f', cos='6003', checksum=1, last_check=0), chk(rowid=4, path="/abc/004", type='f', cos='6001', checksum=1, last_check=17), chk(rowid=5, path="/abc/005", type='f', cos='6002', checksum=0, last_check=0), chk(rowid=6, path="/abc/006", type='f', cos='6003', checksum=0, last_check=8), chk(rowid=7, path="/abc/007", type='f', cos='6001', checksum=0, last_check=0), chk(rowid=8, path="/abc/008", type='f', cos='6002', checksum=0, last_check=19), chk(rowid=9, path="/abc/009", type='f', cos='6003', checksum=0, last_check=0), ] # insert some test data into the table for t in testdata: t.persist() # get a default Dimension with the same name as the data in the table q = Dimension(name='cos') # this should load the data from the table into the object q.load() # verify the loaded data in the object self.expected('cos', q.name) self.assertTrue('6001' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6002' in q.p_sum.keys(), "Expected '6001' in p_sum.keys()") self.assertTrue('6003' in q.p_sum.keys(), "Expected '6003' in p_sum.keys()") self.assertTrue('6001' in q.s_sum.keys(), "Expected '6001' in s_sum.keys()") self.assertTrue('6002' in q.s_sum.keys(), "Expected '6002' in s_sum.keys()") self.assertTrue('6003' in q.s_sum.keys(), "Expected '6003' in s_sum.keys()")
def main(cfg): """ Plugin example """ try: msg = cfg.get('example', 'message') except ConfigParser.NoOptionError: msg = 'No message in configuration' CrawlConfig.log('EXAMPLE: This is plugin EXAMPLE saying "%s"' % msg)
def main(cfg): """ Migration Purge Record Ager (mpra) reads the database tables BFMIGRREC and BFPURGEREC and reports migration and purge records that are older than the age specified in the configuration. """ if cfg is None: cfg = CrawlConfig.get_config() age = cfg.get_time('mpra', 'age') end = time.time() - age start = mpra_lib.mpra_fetch_recent("migr") # # If the configured age has been moved back in time, so that end is before # start, we need to reset and start scanning from the beginning of time. # if end < start: start = 0 CrawlConfig.log("migr recs after %d (%s) before %d (%s)" % (start, util.ymdhms(start), end, util.ymdhms(end))) result = mpra_lib.age("migr", start=start, end=end, mark=True) CrawlConfig.log("found %d migration records in the range" % result) rval = result start = mpra_lib.mpra_fetch_recent("purge") CrawlConfig.log("Looking for expired purge locks") result = mpra_lib.xplocks(mark=True) CrawlConfig.log("found %d expired purge locks" % result) rval += result return rval
def test_ctor_defaults(self): """ A new Dimension with only the name specified should have the right defaults. """ dimname = 'cos' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) a = Dimension(name=dimname) self.expected(dimname, a.name) self.expected(0.01, a.sampsize) self.expected({}, a.p_sum) self.expected({}, a.s_sum)
def test_ctor_bad_attr(self): """ Attempting to create a Dimension with attrs that are not in the settable list should get an exception. """ dimname = 'bad_attr' CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) got_exception = False self.assertRaisesMsg(StandardError, "Attribute 'catl' is not valid", Dimension, name=dimname, catl=[1, 2, 3]) self.assertRaisesMsg(StandardError, "Attribute 'aardvark' is not valid", Dimension, name=dimname, aardvark='Fanny Brice')
def test_html_report(self): """ Try running 'html report > filename' and verify that 1) no traceback occurs and 2) something is actually written to the output file. """ self.dbgfunc() cfpath = self.tmpdir("crawl.cfg") cfg = CrawlConfig.add_config() cfg.crawl_write(open(cfpath, 'w')) cmd = "html report --config %s" % cfpath CrawlConfig.log(cmd, close=True) result = pexpect.run(cmd) if "HPSS Unavailable" in result: pytest.skip("HPSS Unavailable") self.validate_report(result)
def test_env_set_pre(self): """ TEST: set predefined environment variable from [env] entry unconditionally EXP: the old value gets overwritten """ self.dbgfunc() sname = 'env' evname = 'UTIL_TEST' pre_val = "one:two:three" add = "four:five:six" exp = add # make sure the target env variable is set to a known value with util.tmpenv(evname, pre_val): # create a config object with an 'env' section and a non-'+' option cfg = CrawlConfig.CrawlConfig() cfg.add_section(sname) cfg.set(sname, evname, add) # pass the config object to util.env_update() util.env_update(cfg) # verify that the target env variable now contains the new value # and the old value is gone self.expected(exp, os.environ[evname]) self.assertTrue( pre_val not in os.environ[evname], "The old value should be gone but still seems " + "to be hanging around")
def test_env_add_folded_pre(self): """ TEST: add to a preset environment variable from a folded [env] entry EXP: the value gets set to the payload with the whitespace squeezed out """ self.dbgfunc() sname = 'env' evname = 'UTIL_TEST' pre_val = "one:two:three" add = "four:\n five:\n six" exp = ":".join([pre_val, re.sub("\n\s*", "", add)]) # make sure the target env variable has the expected value with util.tmpenv(evname, pre_val): # create a config object with an 'env' section and a folded '+' # option cfg = CrawlConfig.CrawlConfig() cfg.add_section(sname) cfg.set(sname, evname, '+' + add) # pass the config object to util.env_update() util.env_update(cfg) # verify that the variable was set to the expected value self.expected(exp, os.environ[evname])
def test_env_add_pre(self): """ TEST: add to a predefined environment variable from [env] entry EXP: payload is appended to the old value """ self.dbgfunc() sname = 'env' evname = 'UTIL_TEST' pre_val = "one:two:three" add = "four:five:six" exp = ":".join([pre_val, add]) # make sure the target env variable is set to a known value with util.tmpenv(evname, pre_val): # create a config object with an 'env' section and a '+' option cfg = CrawlConfig.CrawlConfig() cfg.add_section(sname) cfg.set(sname, evname, "+" + add) # pass the config object to util.env_update() util.env_update(cfg) # verify that the target env variable now contains both old and # added values self.expected(exp, os.environ[evname])
def test_env_add_folded_none(self): """ TEST: add to an undefined environment variable from a folded [env] entry EXP: the value gets set to the payload with the whitespace squeezed out """ self.dbgfunc() sname = 'env' evname = 'UTIL_TEST' add = "four:\n five:\n six" exp = re.sub("\n\s*", "", add) # make sure the target env variable is not defined with util.tmpenv(evname, None): # create a config object with an 'env' section and a '+' option cfg = CrawlConfig.CrawlConfig() cfg.add_section(sname) cfg.set(sname, evname, '+' + add) # pass the config object to util.env_update() util.env_update(cfg) # verify that the variable was set to the expected value self.expected(exp, os.environ[evname])
def test_env_set_folded_none(self): """ TEST: set undefined environment variable from a folded [env] entry unconditionally EXP: the value gets set """ self.dbgfunc() sname = 'env' evname = 'UTIL_TEST' newval = "one:\n two:\n three" exp = re.sub("\n\s*", "", newval) # make sure the target env variable is not defined with util.tmpenv(evname, None): # create a config object with an 'env' section and a non-'+' option cfg = CrawlConfig.CrawlConfig() cfg.add_section(sname) cfg.set(sname, evname, newval) # pass the config object to util.env_update() util.env_update(cfg) # verify that the variable was set to the expected value self.expected(exp, os.environ[evname])
def test_ctor_reset_atime_cfg_true(self): """ If reset_atime is specified in the config as True, it should be True """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec self.write_cfg_file(cf_name, self.cfg_d) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def main(cfg): """ Generate an html-formatted report and store it at the designated location """ CrawlConfig.log("html_plugin starting") fpath = cfg.get('html', 'output_path') rpt = html_lib.get_html_report(cfg=cfg) npath = fpath + '.new' opath = fpath + '.old' with open(npath, 'w') as out: out.write(rpt) if os.path.exists(fpath): os.rename(fpath, opath) os.rename(npath, fpath) CrawlConfig.log("html_plugin finished")
def main(cfg): """ HSI demo """ CrawlConfig.log("hsi-demo: sending output to hsi.out") hsi_prompt = "]:" S = pexpect.spawn("/opt/public/bin/hsi") S.logfile = f = open("hsi.out", 'a') S.expect(hsi_prompt) S.sendline("ls") S.expect(hsi_prompt) S.sendline("quit") S.expect(pexpect.EOF) S.logfile.close() S.close()
def test_alert_log(self): """ Generate a log alert and verify that the message was written to the correct log file. """ self.dbgfunc() logfile = self.tmpdir('alert_log.log') cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'log', "%s") CrawlConfig.log(logpath=logfile, close=True) x = Alert.Alert(caller='AlertTest', msg='this is a test message', cfg=cfg) self.expected_in('this is a test message', util.contents(logfile))
def test_sum_total(self): """ Return the sum of all the 'count' values in either the p_sum or s_sum dictionary. """ CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name())) Checkable.Checkable.ex_nihilo() a = Dimension(name='sum_total') a.p_sum = {'6001': {'count': 2, 'pct': 50.0}, '5081': {'count': 2, 'pct': 50.0} } a.s_sum = {'6001': {'count': 2, 'pct': 40.0}, '5081': {'count': 3, 'pct': 60.0} } self.expected(4, a.sum_total()) self.expected(4, a.sum_total(dict=a.p_sum)) self.expected(5, a.sum_total(which='s')) self.expected(5, a.sum_total(dict=a.s_sum))
def test_maybe_update_hsi_cant(muh_prep, tmpdir): """ If we don't have write permission on the target, then even if we should update, we can't. In this case, should log a message. """ pytest.dbgfunc() lp = tmpdir.join('crawl.test.log') rf = test_maybe_update_hsi_cant path = ":".join([rf.bin.strpath, rf.hsihome]) with U.tmpenv('PATH', path): CrawlConfig.log(logpath=lp.strpath, close=True) hpss.maybe_update_hsi() c = rf.file.read() assert 'not changed' in c assert os.path.exists(lp.strpath) c = lp.read() assert MSG.hsi_wrap_ood in c CrawlConfig.log(close=True)
def test_ctor_reset_atime_call_true(self): """ If reset_atime is specified in the call as True, it should be True, even if it's specified as False in the config """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cfg = copy.deepcopy(self.cfg_d) cfg['cv']['reset_atime'] = 'no' self.write_cfg_file(cf_name, cfg) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False, reset_atime=True) self.expected(True, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_ctor_reset_atime_default(self): """ If reset_atime is not specified in the config or argument list, it should default to False """ cf_name = self.tmpdir(util.my_name() + ".cfg") # write out a config file with no reset_atime spec cd = copy.deepcopy(self.cfg_d) del cd['cv']['reset_atime'] self.write_cfg_file(cf_name, cd) # make the test config the default CrawlConfig.get_config(cfname=cf_name, reset=True) # get an hpss.HSI object and check its reset_atime attribute h = hpss.HSI(connect=False) self.expected(False, h.reset_atime) CrawlConfig.get_config(reset=True, soft=True)
def test_dlog(self): """ Test method dlog on daemon object """ lfname = self.tmpdir('daemon.dlog.log') lf = CrawlConfig.log(logpath=lfname) a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf) logmsg = "testing the dlog method of %s" % a a.dlog(logmsg) self.assertTrue( logmsg in util.contents(lfname), "Expected '%s' in '%s'" % (logmsg, util.line_quote(util.contents(lfname))))
def test_alert_use_other(self): """ A use directive sends us to another config section where we generate a log alert and verify that the message was written to the correct log file. """ self.dbgfunc() logfile = self.tmpdir('alert_use.log') cfg = CrawlConfig.CrawlConfig() cfg.add_section('crawler') cfg.add_section('AlertTest') cfg.add_section('alert_section') cfg.add_section('other_section') cfg.set('crawler', 'logpath', logfile) cfg.set('AlertTest', 'alerts', 'alert_section') cfg.set('alert_section', 'use', "other_section") cfg.set('other_section', 'log', "%s") CrawlConfig.log(logpath=logfile, close=True) payload = 'this is a test message from %s' % util.my_name() x = Alert.Alert(caller='AlertTest', msg=payload, cfg=cfg) self.expected_in(payload, util.contents(logfile))
def test_ctor_no_cv_section(self): """ If there is no cv section in the config, reset_atime and hash_algorithm should take on their default values. """ self.dbgfunc() cfg = copy.deepcopy(self.cfg_d) del cfg['cv'] zcfg = CrawlConfig.add_config(close=True, dct=cfg) self.assertFalse(zcfg.has_section('cv')) h = hpss.HSI(connect=False) self.expected(False, h.reset_atime) self.expected(None, h.hash_algorithm)
def test_alert_email_defcfg(self): """ Generate an e-mail alert using the default config and verify that it was sent (this is where we use 'monkey patching'). """ self.dbgfunc() fakesmtp.inbox = [] CrawlConfig.add_config(close=True) # with U.tmpenv('CRAWL_CONF', 'hpssic_test.cfg'): with U.tmpenv('CRAWL_CONF', None): logfile = self.tmpdir('alert_email.log') targets = "[email protected], [email protected]" payload = 'this is an e-mail alert' sender = 'hpssic@' + util.hostname(long=True) CrawlConfig.log(logpath=logfile, close=True) x = Alert.Alert(caller='cv', msg=payload) m = fakesmtp.inbox[0] self.expected(', '.join(m.to_address), targets) self.expected(m.from_address, sender) self.expected_in('sent mail to', util.contents(logfile)) self.expected_in(payload, m.fullmessage)
def tcc_priority(globspec, cosinfo): """ Handle any files matching globspec. Return the number of files processed. """ rval = 0 cfg = CrawlConfig.get_config() pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp') for filepath in glob.glob(globspec): tcc_lib.check_file(filepath, verbose=False, plugin=True) cpath = U.pathjoin(pri_compdir, U.basename(filepath)) os.rename(filepath, cpath) return rval
def test_dlog(self): """ Test method dlog on daemon object """ lfname = self.tmpdir('daemon.dlog.log') lf = CrawlConfig.log(logpath=lfname) a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf) logmsg = "testing the dlog method of %s" % a a.dlog(logmsg) self.assertTrue(logmsg in util.contents(lfname), "Expected '%s' in '%s'" % (logmsg, util.line_quote(util.contents(lfname))))
def main(cfg): """ This plugin will generate a report and send it to the designated e-mail address(es). """ rval = 0 try: if cfg is None: cfg = CrawlConfig.get_config() subject = "%s %s" % (cfg.get( 'rpt', 'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime())) CrawlMail.send(sender=cfg.get('rpt', 'sender'), to='rpt.recipients', subj=subject, msg=rpt_lib.get_report()) except Exception as e: rval = 1 CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e)) return rval
def main(cfg): """ This plugin will generate a report and send it to the designated e-mail address(es). """ rval = 0 try: if cfg is None: cfg = CrawlConfig.get_config() subject = "%s %s" % (cfg.get('rpt', 'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime())) CrawlMail.send(sender=cfg.get('rpt', 'sender'), to='rpt.recipients', subj=subject, msg=rpt_lib.get_report()) except Exception as e: rval = 1 CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e)) return rval
def check_hash_algorithm(self, cf_stem, alg, checkfor=None): """ With hash_algorithm = *alg* in config """ if checkfor is None: checkfor = alg # generate a config file and make it the default config cf_name = self.tmpdir(cf_stem + ".cfg") cd = copy.deepcopy(self.cfg_d) if alg == '(none)': del cd['cv']['hash_algorithm'] else: cd['cv']['hash_algorithm'] = alg self.write_cfg_file(cf_name, cd) CrawlConfig.get_config(cfname=cf_name, reset=True) # Get an hsi object testfile = self.plist[1] try: h = hpss.HSI() except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e)) # if necessary, delete any hash on the test file result = h.hashlist(testfile) if "(none)" not in result: h.hashdelete(testfile) # generate a hash on the test file h.hashcreate(testfile) # verify that the hash created is of the proper type result = h.hashlist(testfile) self.expected_in(checkfor, result)
def test_get_html_report(self): """ Call html_lib.get_html_report() directly """ self.dbgfunc() c = CrawlConfig.add_config() db = CrawlDBI.DBI(dbtype="crawler") dbschem.drop_table(table="lscos") self.expected(False, db.table_exists(table="lscos")) try: result = html_lib.get_html_report('') except hpss.HSIerror as e: if MSG.hpss_unavailable in str(e): pytest.skip(str(e)) self.expected(True, db.table_exists(table="lscos")) db.close() self.validate_report(result)
def main(cfg): """ Main entry point for the cv plugin """ # Get stuff we need -- the logger object, dataroot, etc. CrawlConfig.log("firing up") plugdir = cfg.get('crawler', 'plugin-dir') dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot')) odds = cfg.getfloat(plugin_name, 'odds') n_ops = int(cfg.get(plugin_name, 'operations')) # Initialize our statistics (t_checksums, t_matches, t_failures) = get_stats() (checksums, matches, failures) = (0, 0, 0) # Fetch the list of HPSS objects that we're looking at from the # database try: clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot) except CrawlDBI.DBIerror as e: if any([util.rgxin(msg, str(e)) for msg in ["no such table: checkables", "Table '.*' doesn't exist"]]): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise except StandardError as e: if 'Please call .ex_nihilo()' in str(e): CrawlConfig.log("calling ex_nihilo") Checkable.Checkable.ex_nihilo(dataroot=dataroot) clist = Checkable.Checkable.get_list(prob=odds) else: raise # We're going to process n_ops things in the HPSS namespace for op in range(n_ops): # if the list from the database is empty, there's nothing to do if 0 < len(clist): # but it's not, so grab the first item and check it item = clist.pop(0) CrawlConfig.log("[%d] checking %s" % (item.rowid, item)) ilist = item.check() # Expected outcomes that check can return: # list of Checkables: read dir or checksummed files (may be empty) # Alert: checksum verify failed # 'access denied': unaccessible directory # 'matched': a checksum was verified # 'checksummed': file was checksummed # 'skipped': file was skipped # 'unavailable': HPSS is temporarily unavailable # StandardError: invalid Checkable type (not 'f' or 'd') # if type(ilist) == str: if ilist == "access denied": CrawlConfig.log("dir %s not accessible" % item.path) # clist.remove(item) elif ilist == "matched": matches += 1 CrawlConfig.log("%s checksums matched" % item.path) elif ilist == "checksummed": # checksums += 1 CrawlConfig.log("%s checksummed" % item.path) elif ilist == "skipped": CrawlConfig.log("%s skipped" % item.path) elif ilist == "unavailable": CrawlConfig.log("HPSS is not available") break else: CrawlConfig.log("unexpected string returned " + "from Checkable: '%s'" % ilist) elif type(ilist) == list: CrawlConfig.log("in %s, found:" % item) for n in ilist: CrawlConfig.log(">>> %s" % str(n)) if 'f' == n.type and n.checksum != 0: CrawlConfig.log(".. previously checksummed") # checksums += 1 elif isinstance(ilist, Checkable.Checkable): CrawlConfig.log("Checkable returned - file checksummed" + " - %s, %s" % (ilist.path, ilist.checksum)) # checksums += 1 elif isinstance(ilist, Alert.Alert): CrawlConfig.log("Alert generated: '%s'" % ilist.msg()) failures += 1 else: CrawlConfig.log("unexpected return val from " + "Checkable.check: %s: %r" % (type(ilist), ilist)) # Report the statistics in the log # ** For checksums, we report the current total minus the previous # ** For matches and failures, we counted them up during the iteration # ** See the description of get_stats for why we don't store total # checksums p_checksums = t_checksums t_matches += matches t_failures += failures cv_lib.update_stats((t_matches, t_failures)) (t_checksums, t_matches, t_failures) = get_stats() CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) + "checksums matched: %d; " % matches + "failures: %d" % failures) CrawlConfig.log("totals checksummed: %d; " % t_checksums + "matches: %d; " % t_matches + "failures: %d" % t_failures) # Report the dimension data in the log d = Dimension.Dimension(name='cos') t = Dimension.Dimension(name='cart') CrawlConfig.log(d.report()) CrawlConfig.log(t.report()) return failures
last_obj_id, last_obj_id, correct, error) errcount += error CrawlConfig.log("last nsobject in range: %d" % last_obj_id) return errcount # ----------------------------------------------------------------------------- def tcc_priority(globspec, cosinfo): """ Handle any files matching globspec. Return the number of files processed. """ rval = 0 cfg = CrawlConfig.get_config() pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp') for filepath in glob.glob(globspec): tcc_lib.check_file(filepath, verbose=False, plugin=True) cpath = U.pathjoin(pri_compdir, U.basename(filepath)) os.rename(filepath, cpath) return rval # ----------------------------------------------------------------------------- if __name__ == '__main__': main(CrawlConfig.get_config())
def main(cfg): """ Tape Copy Checker retrieves the necessary information from the DB2 database to find files where the number of copies stored may not match the number called for by the COS. """ # retrieve configuration items as needed how_many = int(cfg.get_d(tcc_lib.sectname(), 'operations', 10)) CrawlConfig.log("tape-copy-checker: firing up for %d items" % how_many) # retrieve COS info cosinfo = tcc_lib.get_cos_info() # check for priority file(s) pri_glob = cfg.get_d(tcc_lib.sectname(), 'priority', '') if pri_glob != '': if 0 < tcc_priority(pri_glob, cosinfo): return # get the nsobject_id of the next bitfile to process from mysql next_nsobj_id = tcc_lib.get_next_nsobj_id(cfg) CrawlConfig.log("next nsobject id = %d" % next_nsobj_id) # fetch the next N bitfiles from DB2 CrawlConfig.log("looking for nsobject ids between %d and %d" % (next_nsobj_id, next_nsobj_id+how_many-1)) try: bfl = tcc_lib.get_bitfile_set(int(next_nsobj_id), how_many) except U.HpssicError as e: bfl = [] pass CrawlConfig.log("got %d bitfiles" % len(bfl)) errcount = 0 if len(bfl) == 0: for oid in range(next_nsobj_id, next_nsobj_id+how_many): tcc_lib.record_checked_ids(cfg, oid, oid, 1, 0) if cfg.getboolean(tcc_lib.sectname(), 'verbose'): CrawlConfig.log("Object %d is not complete" % oid) errcount += 1 else: # for each bitfile, if it does not have the right number of copies, # report it for bf in bfl: correct = 1 error = 0 if bf['SC_COUNT'] != cosinfo[bf['BFATTR_COS_ID']]: tcc_lib.tcc_report(bf, cosinfo) correct = 0 error = 1 CrawlConfig.log("%s %s %d != %d" % (bf['OBJECT_ID'], tcc_lib.hexstr(bf['BFID']), bf['SC_COUNT'], cosinfo[bf['BFATTR_COS_ID']])) elif cfg.getboolean(tcc_lib.sectname(), 'verbose'): CrawlConfig.log("%s %s %d == %d" % (bf['OBJECT_ID'], tcc_lib.hexstr(bf['BFID']), bf['SC_COUNT'], cosinfo[bf['BFATTR_COS_ID']])) last_obj_id = int(bf['OBJECT_ID']) tcc_lib.record_checked_ids(cfg, last_obj_id, last_obj_id, correct, error) errcount += error CrawlConfig.log("last nsobject in range: %d" % last_obj_id) return errcount