def nulls_from_checkables(): """ Return rows from table checkables that contain null values """ db = CrawlDBI.DBI(dbtype="crawler") rval = db.select(table="checkables", fields=[ "rowid", "path", "type", "cos", "cart", "ttypes", "checksum", "last_check", "fails", "reported" ], where="fails is null or reported is null or cart is null") db.close() return rval
def retrieve_history(**kw): """ Retrieve and return the contents of table 'history'. At some point, we may need to turn this into a generator so we don't try to load the whole table into memory at once, but for now YAGNI. """ db = CrawlDBI.DBI(dbtype='crawler') kw['table'] = 'history' if 'fields' not in kw: kw['fields'] = ['plugin', 'runtime', 'errors'] rows = db.select(**kw) db.close() return rows
def table_list(): """ Return the list of HPSS tables from the DB2 database """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') db._dbobj.tbl_prefix = 'syscat.' rows = db.select(table='tables', fields=[ "substr(tabname, 1, 30) as \"Table\"", "substr(tabschema, 1, 30) as \"Schema\"", "type" ], where="tabschema = 'HPSS'") return rows
def load_history(): """ Read the contents of table pfx_report and load the report times into table pfx_history as run times for the report plugin. """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table='report', fields=['report_time']) insert_data = [('report', x[0], 0) for x in rows] db.insert(table='history', ignore=True, fields=['plugin', 'runtime', 'errors'], data=insert_data) db.close()
def load(self): """ Read a checkable from the database and fill out the object """ db = CrawlDBI.DBI(dbtype='crawler') if self.rowid is not None: rows = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where="rowid = ?", data=(self.rowid, )) else: rows = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where="path = ?", data=(self.path, )) if 0 == len(rows): self.in_db = False elif 1 == len(rows): self.in_db = True rz = list(rows[0]) self.rowid = rz.pop(0) self.path = rz.pop(0) self.type = rz.pop(0) self.cos = rz.pop(0) self.cart = rz.pop(0) self.ttypes = rz.pop(0) self.checksum = rz.pop(0) self.last_check = rz.pop(0) try: self.fails = rz.pop(0) except IndexError: self.fails = 0 try: self.reported = rz.pop(0) except IndexError: self.reported = 0 self.dirty = False else: raise StandardError("There appears to be more than one copy " + "of %s in the database" % self) db.close()
def get_checksum_count(): """ Return the count of checksums in the crawler database """ db = CrawlDBI.DBI(dbtype="crawler") if db.table_exists(table="checkables"): rows = db.select(table='checkables', fields=["count(path)"], where="checksum = 1") checksums = rows[0][0] else: checksums = 0 db.close() return checksums
def get_cos_info(obarg=None): """ Read COS info from tables COS and HIER in the DB2 database """ db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg') rows = db.select( table=['cos A', 'hier B'], fields=['A.cos_id', 'A.hier_id', 'B.slevel0_migrate_list_count'], where="A.hier_id = B.hier_id") rval = {} for r in rows: rval[r['COS_ID']] = r['SLEVEL0_MIGRATE_LIST_COUNT'] return rval
def tpop_update_by_path(data): """ Update media type (ttypes) and cartridge names (cart) based on path. Incoming *data* is a list of tuples containing ttypes, cart, path, and last check. """ zdata = [(d[0], d[1], d[2]) for d in data] db = CrawlDBI.DBI(dbtype="crawler") db.update(table="checkables", fields=["ttypes", "cart"], where="path = ?", data=zdata) db.close()
def get_report(): """ Generate and return a text report """ db = CrawlDBI.DBI(dbtype="crawler") last_report_time = rpt_sublib.get_last_rpt_time(db) report = get_cv_report(db, last_report_time) report += get_mpra_report(db, last_report_time) report += get_tcc_report(db, last_report_time) set_last_rpt_time(db) db.close() return report
def alter_table(table=None, addcol=None, dropcol=None, pos=None, cfg=None): """ Alter a table, either adding a column (*addcol*) in position *pos*, or dropping a column (*dropcol*). This function should be idempotent, so we need to check for the column before adding it. """ if cfg: db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg) else: db = CrawlDBI.DBI(dbtype="crawler") if addcol and dropcol: raise CrawlDBI.DBIerror("addcol and dropcol are mutually exclusive") elif addcol: fieldname = addcol.split()[0] elif dropcol: fieldname = dropcol try: db.alter(table=table, addcol=addcol, dropcol=dropcol, pos=pos) rval = "Successful" except CrawlDBI.DBIerror as e: if (dropcol and "Can't DROP '%s'; check that column/key exists" % fieldname in str(e)): # edit the error number off the front of the message rval = re.sub("\s*\d+:\s*", "", e.value) elif (addcol and "Duplicate column name '%s'" % fieldname in str(e)): # edit the error number off the front of the message rval = re.sub("\s*\d+:\s*", "", e.value) else: raise db.close() return rval
def load_history(): """ Read the contents of table pfx_tcc_data and load the unique check_times into table pfx_history as tcc runtimes. """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table='tcc_data', fields=['check_time', 'sum(error)'], groupby='check_time') insert_data = [('tcc', x[0], x[1]) for x in rows] db.insert(table='history', ignore=True, fields=['plugin', 'runtime', 'errors'], data=insert_data) db.close()
def get_match_fail_count(): """ Return the match and fail counts in the crawler database """ db = CrawlDBI.DBI(dbtype="crawler") if db.table_exists(table=stats_table): rows = db.select(table=stats_table, fields=["matches", "failures"], where="rowid = 1") (matches, failures) = rval = rows[0] else: (matches, failures) = (0, 0) db.close() return (matches, failures)
def prep_popcart(where, limit): """ Get a list of paths and carts from database based on where. If 0 < limit, no more than limit records will be retrieved. """ db = CrawlDBI.DBI(dbtype="crawler") kw = {} if 0 < limit: kw['limit'] = limit rows = db.select(table="checkables", fields=["path", "cart"], where=where, **kw) db.close() return rows
def ttype_cart_lookup(cartname): """ Look up *cartname* in HPSS table PVLPV and return the cartridge's media type and subtype. """ db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg') rows = db.select(table="pvlpv", fields=[ "phys_vol_type_type", "phys_vol_type_subtype", ], where="phys_vol_id = ?", data=(cartname, )) db.close() return (rows[0]['PHYS_VOL_TYPE_TYPE'], rows[0]['PHYS_VOL_TYPE_SUBTYPE'])
def find_by_path(self): """ Look up the current item in the database based on path, returning the database row(s). """ db = CrawlDBI.DBI(dbtype='crawler') rv = db.select(table='checkables', fields=[ 'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum', 'last_check', 'fails', 'reported' ], where='path=?', data=(self.path, )) db.close() return rv
def ttype_map_desc(type, subtype): """ Look up *type* and *subtype* in the crawler table tape_types and return the corresponding media description. """ db = CrawlDBI.DBI(dbtype='crawler') rows = db.select(table="tape_types", fields=["name"], where="type = ? and subtype = ?", data=( type, subtype, )) db.close() return rows[0][0]
def load_history(filename): """ Read log file *filename* and create records in table pfx_history corresponding to each time the cv plugin was run. Line containing 'cv_plugin' and 'firing up' indicates run time. Subsequent line containing 'cv_plugin' and 'failures: %d' indicate errors for the run. """ # ------------------------------------------------------------------------- def cv_fires(line): """ Parse *line* to decide whether it indicates a firing of the cv plugin. """ return all([runtime is None, error is None, 'cv_plugin' in line or 'checksum-verifier' in line, 'firing up' in line]) # ------------------------------------------------------------------------- def cv_completes(line): """ Parse *line* to decide whether it indicates completion of a firing of the cv plugin. """ return all([runtime is not None, error is None, 'cv_plugin' in line or 'checksum-verifier' in line, 'failures:' in line, 'totals' not in line]) # ------------------------------------------------------------------------- db = CrawlDBI.DBI(dbtype='crawler') runtime = error = None with open(filename, 'r') as f: for line in f: if cv_fires(line): runtime = U.epoch(line[0:18]) if cv_completes(line): error = int(U.rgxin('failures: (\d+)', line)) if runtime is not None and error is not None: db.insert(table='history', ignore=True, fields=['plugin', 'runtime', 'errors'], data=[('cv', runtime, error)]) runtime = error = None db.close()
def lookup_checksum_by_path(path): """ Retrieve a row by path and return the checksum value """ db = CrawlDBI.DBI(dbtype="crawler") rows = db.select(table="checkables", fields=["checksum"], where="path = '%s'" % path) if 0 == len(rows): rval = -1 elif 1 == len(rows): rval = rows[0][0] else: raise SystemExit("Too many records found for path %s" % path) db.close()
def tpop_select_all(db=None): """ Return a list of all the 'f' (file) type checkables records with a null ttypes or cart. """ close = False if db is None: db = CrawlDBI.DBI(dbtype='crawler') close = True rval = db.select(table="checkables", fields=["path", "type", "ttypes", "cart", "last_check"], where="type = 'f' and " + "(ttypes is NULL or cart is NULL)") if close: db.close() return rval
def update_stats(cmf): """ Record the values in tuple cmf in table cvstats in the database. If the table does not exist, create it. """ result = dbschem.make_table(stats_table) db = CrawlDBI.DBI(dbtype="crawler") if result == "Created": db.insert(table=stats_table, fields=["rowid", "matches", "failures"], data=[(1, 0, 0)]) db.update(table=stats_table, fields=["matches", "failures"], data=[cmf], where="rowid = 1") db.close()
def bitfile_lookup(bflist, dbh=None): """ Lookup each of the ids in *bflist* in table BITFILE. Return the count of records found. """ local_connect = False if dbh is None: dbh = CrawlDBI.DBI(dbtype='hpss', dbname='sub') local_connect = True count = dbh.select(table='bitfile', fields=['count(bfid) as bfid_count'], where='bfid in (%s)' % ','.join(bflist)) if local_connect: dbh.close() return (count[0]['BFID_COUNT'])
def lscos_show(): """ Report the contents of the lscos table if it exists """ db = CrawlDBI.DBI(dbtype="crawler") if not db.table_exists(table='lscos'): rval = "\n Table lscos does not exist\n" else: rows = db.select( table='lscos', fields=['cos', 'name', 'copies', 'min_size', 'max_size']) db.close() rval = "" for r in rows: rval += "%s %-35s %3d %10d %10d\n" % r return rval
def lscos_populate(): """ If table lscos already exists, we're done. Otherwise, retrieve the lscos info from hsi, create the table, and fill the table in. We store the min_size and max_size for each COS as text strings containing digits because the largest sizes are already within three orders of magnitude of a mysql bigint and growing. """ db = CrawlDBI.DBI(dbtype="crawler") tabname = 'lscos' st = dbschem.make_table(tabname) szrgx = "(\d+([KMGT]B)?)" rgx = ("\s*(\d+)\s*(([-_a-zA-Z0-9]+\s)+)\s+[UGAN]*\s+(\d+)" + "\s+(ALL)?\s+%s\s+-\s+%s" % (szrgx, szrgx)) if "Created" == st: H = hpss.HSI() raw = H.lscos() H.quit() z = [x.strip() for x in raw.split('\r')] rules = [q for q in z if '----------' in q] first = z.index(rules[0]) + 1 second = z[first:].index(rules[0]) + first lines = z[first:second] data = [] for line in lines: m = U.rgxin(rgx, line) (cos, desc, copies, lo_i, hi_i) = (m[0], m[1].strip(), m[3], U.scale(m[5], kb=1024), U.scale(m[7], kb=1024)) data.append((cos, desc, copies, lo_i, hi_i)) db.insert(table=tabname, fields=['cos', 'name', 'copies', 'min_size', 'max_size'], data=data) rval = MSG.table_created_S % tabname else: rval = MSG.table_already_S % tabname db.close() return rval
def cos_description(name): """ Look up and return the description of cos *name* """ db = CrawlDBI.DBI(dbtype='crawler') if not db.table_exists(table='lscos'): cv_sublib.lscos_populate() r = db.select(table='lscos', fields=['name'], where="cos = ?", data=(name,)) if 0 < len(r): rval = r[0][0] else: rval = "???" db.close() return rval
def distinct_objects(where="", db=None): """ Return distinct objects from tcc_data where correct == 1 or correct <> 1 """ need_close = False if db is None: db = CrawlDBI.DBI(dbtype="crawler") need_close = True kw = {'table': "tcc_data", 'fields': ["distinct(low_nsobj_id)"]} if where != "": kw['where'] = where kw['fields'].append("correct") rows = db.select(**kw) if need_close: db.close() return rows
def xplocks(output=None, mark=False): """ Look for expired purge locks in bfpurgerec. """ cfg = CrawlConfig.get_config() now = time.time() hits = 0 opened = True if output is None: f = open(cfg.get('mpra', 'report_file'), 'a') elif type(output) == str: f = open(output, 'a') elif type(output) == file: f = output opened = False else: raise StandardError("output type must be 'str' or 'file' ") dbs = CrawlDBI.DBI(dbtype='hpss', dbname='sub') lock_min = cfg.getint('mpra', 'lock_duration') rows = dbs.select(table='bfpurgerec', fields=['bfid', 'record_lock_time'], where='record_lock_time <> 0') if 0 < len(rows): f.write("Expired Purge Locks\n") for r in rows: if (lock_min * 60) < (now - r['RECORD_LOCK_TIME']): hits += 1 f.write(" %s %s\n" % (CrawlDBI.DBIdb2.hexstr(r['BFID']), util.ymdhms(r['RECORD_LOCK_TIME']))) if mark: mpra_record_recent('purge', 0, 0, hits) if opened: f.close() return hits
def drop_tables_matching(tablike): """ Drop tables with names matching the *tablike* expression. At the time of writing, this is only used for drop test tables ('test_%') """ tcfg = CrawlConfig.get_config() tcfg.set('dbi-crawler', 'tbl_prefix', '') db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler') if CrawlDBI.mysql_available and 'mysql' in str(db): # db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler') with warnings.catch_warnings(): warnings.filterwarnings("ignore", "Can't read dir of .*") tlist = db.select(table="information_schema.tables", fields=['table_name'], where="table_name like '%s'" % tablike) for (tname,) in tlist: if db.table_exists(table=tname): db.drop(table=tname) db.close()
def nsobject_lookup(start_id, id_count, dbh=None): """ Lookup the records in NSOBJECT beginning with start_id and continuing for id_count entries. Return the list of corresponding bitfile ids. """ local_connect = False if dbh is None: dbh = CrawlDBI.DBI(dbtype='hpss', dbname='sub') local_connect = True bflist = dbh.select(table='nsobject', fields=['bitfile_id'], where='? <= object_id and object_id < ?', data=(start_id, start_id + id_count), limit=id_count) if local_connect: dbh.close() rval = [CrawlDBI.DBIdb2.hexstr(z['BITFILE_ID']) for z in bflist] return rval
def nsobj_id(name='', parent=None): """ Look up an nsobject id based on name and, optionally, parent id """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') if name == '': return -1 elif name != '' and parent is None: where = "name = '%s'" % name elif name != '' and parent is not None: where = "name = '%s' and parent_id=%d" % (name, parent) rows = db.select(table='hpss.nsobject', fields=['object_id', 'parent_id'], where=where) db.close() try: rval = (rows[0]['OBJECT_ID'], rows[0]['PARENT_ID']) except IndexError: raise U.HpssicError(MSG.no_such_path_component_SD % (name, parent)) return rval
def get_bitfile_set(first_nsobj_id, limit): """ Get a collection of bitfiles from DB2 returning a dict. The bitfiles in the set begin with object_id first_nsobj_id and end with the one before last_nsobj_id. """ db = CrawlDBI.DBI(dbtype='hpss', dbname='sub') bfid_list = nsobject_lookup(first_nsobj_id, limit, dbh=db) if 0 == len(bfid_list): db.close() raise U.HpssicError(MSG.not_in_nsobject_D % first_nsobj_id) n_found = bitfile_lookup(bfid_list, dbh=db) if 0 == n_found: db.close() raise U.HpssicError(MSG.not_in_bitfile_S % bfid_list[0]) rval = db.select(table=['nsobject A', 'bitfile B', 'bftapeseg C'], fields=[ 'A.object_id', 'B.bfid', 'B.bfattr_cos_id', 'B.bfattr_create_time', 'count(C.storage_class) as sc_count' ], where="A.bitfile_id = B.bfid and B.bfid = C.bfid and " + "B.bfattr_data_len > 0 and C.bf_offset = 0 and " + "? <= A.object_id and A.object_id < ? ", groupby=", ".join([ "A.object_id", "B.bfid", "B.bfattr_cos_id", "B.bfattr_create_time" ]), data=(first_nsobj_id, first_nsobj_id + limit), limit=limit) db.close() if 0 == len(rval): raise U.HpssicError(MSG.not_in_bftapeseg_S % bfid_list[0]) return rval