def load(self, already_open=False): """ Load this object with data from the database """ if not already_open: self.db = CrawlDBI.DBI(dbtype='crawler') dimname = self.name try: # populate the p_sum structure rows = self.db.select(table='checkables', fields=["count(path)", dimname], where='type="f" and last_check <> 0', groupby=dimname) self.p_sum = self._compute_dict(rows) # populate the s_sum structure rows = self.db.select(table='checkables', fields=["count(path)", dimname], where='type = "f" and checksum = 1', groupby=dimname) self.s_sum = self._compute_dict(rows) except CrawlDBI.DBIerror: pass for cval in self.p_sum: if cval not in self.s_sum: self.s_sum[cval] = {'count': 0, 'pct': 0} if not already_open: self.db.close()
def drop_table(cfg=None, prefix=None, table=None): """ This wraps the table dropping operation. """ if table is None: return(MSG.nothing_to_drop) if cfg is None: cfg = CrawlConfig.get_config() if prefix is None: prefix = cfg.get('dbi-crawler', 'tbl_prefix') else: cfg.set('dbi-crawler', 'tbl_prefix', prefix) db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg) if not db.table_exists(table=table): rval = ("Table '%s' does not exist" % (table)) else: db.drop(table=table) if db.table_exists(table=table): rval = ("Attempt to drop table '%s' failed" % (table)) else: rval = ("Attempt to drop table '%s' was successful" % (table)) db.close() return rval
def get_html_report(cfg_file=None, cfg=None): """ Format a report in HTML """ rval = "" if cfg is not None: # use it pass elif cfg_file is not None: cfg = CrawlConfig.add_config(filename=cfg_file) else: cfg = CrawlConfig.add_config() db = CrawlDBI.DBI(dbtype="crawler") last_rpt_time = rpt_lib.get_last_rpt_time(db) rval += ('<head><meta http-equiv="refresh" content="60">\n') rval += ("<title>HPSSIC Dashboard</title></head>") rval += ("<body><center><h1>HPSS Integrity Crawler Dashboard</h1>" + "<br><h4>Version %s</h4>" % version.__version__ + "</center>\n") rval += ("Report generated at %s\n" % time.strftime("%Y.%m%d %H:%M:%S")) rval += ("<br>Based on data from %s\n" % time.strftime("%Y.%m%d %H:%M:%S", time.localtime(last_rpt_time))) rval += get_html_cv_report(db, last_rpt_time) rval += get_html_mpra_report(db, last_rpt_time) rval += get_html_tcc_report(db, last_rpt_time) rval += "</body>" db.close() return rval
def mpra_fetch_recent(type): """ Retrieve and return the most recent record reported so we don't report the same record repeatedly """ db = CrawlDBI.DBI(dbtype="crawler") if not db.table_exists(table='mpra'): CrawlConfig.log("Fetch from not existent mpra table -- return 0") return 0 rows = db.select(table='mpra', fields=['scan_time, end_time'], where='type = ?', data=(type,)) last_end_time = -1 max_scan_time = 0 for r in rows: if max_scan_time < r[0]: max_scan_time = r[0] last_end_time = r[1] if last_end_time < 0: CrawlConfig.log("No '%s' value in mpra -- returning 0" % type) return 0 else: CrawlConfig.log("Fetch '%s' from mpra table -- return %d" % (type, last_end_time)) return last_end_time
def get_bitfile_path(bitfile): """ Given a bitfile id, walk back up the tree in HPSS to generate the bitfile's path """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') rows = db.select(table='nsobject', fields=['parent_id', 'name'], where='bitfile_id = ?', data=(bitfile, )) if 1 < len(rows): raise U.HpssicError(MSG.multiple_objects_S % hexstr(bitfile)) elif len(rows) < 1: return ("<unnamed bitfile>") rval = '' while rows: x = rows[0] if rval == '': rval = x['NAME'] else: rval = os.path.join(x['NAME'], rval) rows = db.select(table='nsobject', fields=['parent_id', 'name'], where='object_id = ?', data=(x['PARENT_ID'], )) return rval
def record_checked_ids(cfg, low, high, correct, error): """ Save checked NSOBJECT ids in the HPSSIC database. If we check a range and get no hits (i.e., no NSOBJECT ids exist in the range), we'll store (<time>, <low-id>, <high-id>, 0, 0) If we get a hit with the right copy count, we store it by itself as (<time>, <hit-id>, <hit-id>, 1, 0) If we get a hit with the wrong copy count, we store it by itself as (<time>, <hit-id>, <hit-id>, 0, 1) """ tabname = cfg.get(sectname(), 'table_name') result = dbschem.make_table(tabname) ts = int(time.time()) CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts)) db = CrawlDBI.DBI(dbtype="crawler") db.insert(table=tabname, fields=[ 'check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct', 'error' ], data=[(ts, low, high, correct, error)]) db.close()
def get_next_nsobj_id(cfg): """ Read the TCC table in the HPSSIC database to get the next nsobject id. If the table does not exist, we create it and return 1 for the next object id to check. If the table exists but is empty, we return 1 for the next object id to check. """ tabname = cfg.get(sectname(), 'table_name') db = CrawlDBI.DBI(dbtype="crawler") if not db.table_exists(table=tabname): rval = 1 else: rows = db.select(table=tabname, fields=['max(check_time)']) max_time = rows[0][0] if max_time is None: rval = 1 else: rows = db.select(table=tabname, fields=['high_nsobj_id'], where='check_time = ?', data=(max_time, )) rval = int(rows[0][0]) + 1 if highest_nsobject_id() < rval: rval = 1 db.close() return rval
def ttype_map_insert(TT): """ Populate the table PFX_tape_types with the contents of *data*. """ tt_tups = [] for k in TT: if type(k) == int: try: mtype = TT[k]['label'] except KeyError: mtype = TT[k]['name'] for l in TT[k]: if type(l) == int: try: mstype = TT[k][l]['label'] except KeyError: mstype = TT[k][l]['list'][0] tt_tups.append((k, l, '%s/%s' % (mtype, mstype))) db = CrawlDBI.DBI(dbtype="crawler") db.insert(table='tape_types', fields=['type', 'subtype', 'name'], data=tt_tups) db.close()
def popcart(pc_l): """ *pc_l* contains tuples of (path, db val, hsi cart val) """ hp_l = [(x[2], x[0]) for x in pc_l] db = CrawlDBI.DBI(dbtype="crawler") db.update(table="checkables", fields=["cart"], where="path = ?", data=hp_l) db.close()
def db_select(dbtype='hpss', dbname='sub', **dbargs): """ Issue a select based on arguments passed in. Return the result. """ db = CrawlDBI.DBI(dbtype=dbtype, dbname=dbname) result = db.select(**dbargs) db.close() return result
def lookup_migr_recs(**dbargs): """ Look up and report records from table BFMIGRREC based on criteria provided by the user """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') rval = db.select(**dbargs) db.close() return rval
def reset_path(pathname): """ Reset the fails and reported fields on a rows so it can be rechecked """ db = CrawlDBI.DBI(dbtype="crawler") db.update(table='checkables', fields=['fails', 'reported'], where="path = ?", data=[(0, 0, o.pathname)]) db.close()
def max_nsobj_id(): """ Return the value of the largest NS object id in the nsobject table """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') result = db.select(table='nsobject', fields=['max(object_id) as max_obj_id']) db.close() rval = int(result[0]['MAX_OBJ_ID']) return rval
def record_history(name, when, errors): """ Record a plugin name and runtime in the history table """ db = CrawlDBI.DBI(dbtype='crawler') if not db.table_exists(table='history'): dbschem.make_table('history') db.insert(table='history', fields=['plugin', 'runtime', 'errors'], data=[(name, when, errors)]) db.close()
def make_table(tabname, cfg=None): """ Make the indicated table if it does not exist """ db = CrawlDBI.DBI(dbtype='crawler', cfg=cfg) if db.table_exists(table=tabname): rval = "Already" else: db.create(table=tabname, fields=tdefs[tabname]['fields']) rval = "Created" db.close() return rval
def mpra_record_recent(type, start, end, hits): """ Record the most recent record reported so we don't report records repeatedly. However, if recent is not later than the time already stored, we don't want to update it. """ dbschem.make_table('mpra') db = CrawlDBI.DBI(dbtype="crawler") db.insert(table='mpra', fields=['type', 'scan_time', 'start_time', 'end_time', 'hits'], data=[(type, int(time.time()), int(start), int(end), hits)]) db.close()
def load_recheck_list(cls, how_many): """ Look to see whether any of the already checksummed items in the database have a last check time over the threshold for rechecking. If so, we'll shove some of them to the front of the list based on the configuration. """ cfg = CrawlConfig.add_config() r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0')) r_age = cfg.get_time('cv', 'recheck_age', 365 * 24 * 3600) threshold = int(time.time() - r_age) CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold) if r_fraction == 0.0: return [] limit = round(r_fraction * how_many) db = CrawlDBI.DBI(dbtype='crawler') kw = { 'table': 'checkables', 'fields': [ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], 'where': 'checksum <> 0 and last_check < %d' % threshold, 'orderby': 'last_check', 'limit': limit } rows = db.select(**kw) db.close() rval = [] for row in rows: tmp = list(row) new = Checkable(rowid=tmp.pop(0), path=tmp.pop(0), type=tmp.pop(0), cos=tmp.pop(0), cart=tmp.pop(0), ttypes=tmp.pop(0), checksum=tmp.pop(0), last_check=tmp.pop(0), fails=tmp.pop(0), reported=tmp.pop(0), in_db=True, dirty=False) rval.append(new) return rval
def ttype_missing(): """ Return a list of records where type = 'f' and ttype is null """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where="type = 'f' and ttypes is null") db.close() return rows
def table_list(): """ Return the list of HPSS tables from the DB2 database """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') db._dbobj.tbl_prefix = 'syscat.' rows = db.select(table='tables', fields=[ "substr(tabname, 1, 30) as \"Table\"", "substr(tabschema, 1, 30) as \"Schema\"", "type" ], where="tabschema = 'HPSS'") return rows
def nulls_from_checkables(): """ Return rows from table checkables that contain null values """ db = CrawlDBI.DBI(dbtype="crawler") rval = db.select(table="checkables", fields=[ "rowid", "path", "type", "cos", "cart", "ttypes", "checksum", "last_check", "fails", "reported" ], where="fails is null or reported is null or cart is null") db.close() return rval
def retrieve_history(**kw): """ Retrieve and return the contents of table 'history'. At some point, we may need to turn this into a generator so we don't try to load the whole table into memory at once, but for now YAGNI. """ db = CrawlDBI.DBI(dbtype='crawler') kw['table'] = 'history' if 'fields' not in kw: kw['fields'] = ['plugin', 'runtime', 'errors'] rows = db.select(**kw) db.close() return rows
def lookup_nulls(): """ Return records that contain NULL values """ db = CrawlDBI.DBI(dbtype="crawler") rval = db.select(table="checkables", fields=[ "rowid", "path", "type", "cos", "cart", "ttypes", "checksum", "last_check", "fails", "reported" ], where="cos is NULL or cart is NULL or ttypes is NULL") db.close() return rval
def load_history(): """ Read the contents of table pfx_mpra and load the unique scan_times into table pfx_history as mpra runtimes. """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table='mpra', fields=['type', 'scan_time', 'hits']) db.insert(table='history', ignore=True, fields=['plugin', 'runtime', 'errors'], data=list(rows)) db.close()
def get_cos_info(obarg=None): """ Read COS info from tables COS and HIER in the DB2 database """ db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg') rows = db.select( table=['cos A', 'hier B'], fields=['A.cos_id', 'A.hier_id', 'B.slevel0_migrate_list_count'], where="A.hier_id = B.hier_id") rval = {} for r in rows: rval[r['COS_ID']] = r['SLEVEL0_MIGRATE_LIST_COUNT'] return rval
def tpop_update_by_path(data): """ Update media type (ttypes) and cartridge names (cart) based on path. Incoming *data* is a list of tuples containing ttypes, cart, path, and last check. """ zdata = [(d[0], d[1], d[2]) for d in data] db = CrawlDBI.DBI(dbtype="crawler") db.update(table="checkables", fields=["ttypes", "cart"], where="path = ?", data=zdata) db.close()
def get_checksum_count(): """ Return the count of checksums in the crawler database """ db = CrawlDBI.DBI(dbtype="crawler") if db.table_exists(table="checkables"): rows = db.select(table='checkables', fields=["count(path)"], where="checksum = 1") checksums = rows[0][0] else: checksums = 0 db.close() return checksums
def load_history(): """ Read the contents of table pfx_report and load the report times into table pfx_history as run times for the report plugin. """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table='report', fields=['report_time']) insert_data = [('report', x[0], 0) for x in rows] db.insert(table='history', ignore=True, fields=['plugin', 'runtime', 'errors'], data=insert_data) db.close()
def load(self): """ Read a checkable from the database and fill out the object """ db = CrawlDBI.DBI(dbtype='crawler') if self.rowid is not None: rows = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where="rowid = ?", data=(self.rowid, )) else: rows = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where="path = ?", data=(self.path, )) if 0 == len(rows): self.in_db = False elif 1 == len(rows): self.in_db = True rz = list(rows[0]) self.rowid = rz.pop(0) self.path = rz.pop(0) self.type = rz.pop(0) self.cos = rz.pop(0) self.cart = rz.pop(0) self.ttypes = rz.pop(0) self.checksum = rz.pop(0) self.last_check = rz.pop(0) try: self.fails = rz.pop(0) except IndexError: self.fails = 0 try: self.reported = rz.pop(0) except IndexError: self.reported = 0 self.dirty = False else: raise StandardError("There appears to be more than one copy " + "of %s in the database" % self) db.close()
def get_report(): """ Generate and return a text report """ db = CrawlDBI.DBI(dbtype="crawler") last_report_time = rpt_sublib.get_last_rpt_time(db) report = get_cv_report(db, last_report_time) report += get_mpra_report(db, last_report_time) report += get_tcc_report(db, last_report_time) set_last_rpt_time(db) db.close() return report
def alter_table(table=None, addcol=None, dropcol=None, pos=None, cfg=None): """ Alter a table, either adding a column (*addcol*) in position *pos*, or dropping a column (*dropcol*). This function should be idempotent, so we need to check for the column before adding it. """ if cfg: db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg) else: db = CrawlDBI.DBI(dbtype="crawler") if addcol and dropcol: raise CrawlDBI.DBIerror("addcol and dropcol are mutually exclusive") elif addcol: fieldname = addcol.split()[0] elif dropcol: fieldname = dropcol try: db.alter(table=table, addcol=addcol, dropcol=dropcol, pos=pos) rval = "Successful" except CrawlDBI.DBIerror as e: if (dropcol and "Can't DROP '%s'; check that column/key exists" % fieldname in str(e)): # edit the error number off the front of the message rval = re.sub("\s*\d+:\s*", "", e.value) elif (addcol and "Duplicate column name '%s'" % fieldname in str(e)): # edit the error number off the front of the message rval = re.sub("\s*\d+:\s*", "", e.value) else: raise db.close() return rval