Beispiel #1
0
    def load(self, already_open=False):
        """
        Load this object with data from the database
        """
        if not already_open:
            self.db = CrawlDBI.DBI(dbtype='crawler')

        dimname = self.name
        try:
            # populate the p_sum structure
            rows = self.db.select(table='checkables',
                                  fields=["count(path)", dimname],
                                  where='type="f" and last_check <> 0',
                                  groupby=dimname)
            self.p_sum = self._compute_dict(rows)

            # populate the s_sum structure
            rows = self.db.select(table='checkables',
                                  fields=["count(path)", dimname],
                                  where='type = "f" and checksum = 1',
                                  groupby=dimname)
            self.s_sum = self._compute_dict(rows)
        except CrawlDBI.DBIerror:
            pass

        for cval in self.p_sum:
            if cval not in self.s_sum:
                self.s_sum[cval] = {'count': 0, 'pct': 0}

        if not already_open:
            self.db.close()
Beispiel #2
0
def drop_table(cfg=None, prefix=None, table=None):
    """
    This wraps the table dropping operation.
    """
    if table is None:
        return(MSG.nothing_to_drop)

    if cfg is None:
        cfg = CrawlConfig.get_config()

    if prefix is None:
        prefix = cfg.get('dbi-crawler', 'tbl_prefix')
    else:
        cfg.set('dbi-crawler', 'tbl_prefix', prefix)

    db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg)
    if not db.table_exists(table=table):
        rval = ("Table '%s' does not exist" % (table))
    else:
        db.drop(table=table)
        if db.table_exists(table=table):
            rval = ("Attempt to drop table '%s' failed" % (table))
        else:
            rval = ("Attempt to drop table '%s' was successful" % (table))

    db.close()
    return rval
Beispiel #3
0
def get_html_report(cfg_file=None, cfg=None):
    """
    Format a report in HTML
    """
    rval = ""
    if cfg is not None:
        # use it
        pass
    elif cfg_file is not None:
        cfg = CrawlConfig.add_config(filename=cfg_file)
    else:
        cfg = CrawlConfig.add_config()

    db = CrawlDBI.DBI(dbtype="crawler")

    last_rpt_time = rpt_lib.get_last_rpt_time(db)
    rval += ('<head><meta http-equiv="refresh" content="60">\n')
    rval += ("<title>HPSSIC Dashboard</title></head>")
    rval += ("<body><center><h1>HPSS Integrity Crawler Dashboard</h1>" +
             "<br><h4>Version %s</h4>" % version.__version__ +
             "</center>\n")
    rval += ("Report generated at %s\n" % time.strftime("%Y.%m%d %H:%M:%S"))
    rval += ("<br>Based on data from %s\n" %
             time.strftime("%Y.%m%d %H:%M:%S", time.localtime(last_rpt_time)))
    rval += get_html_cv_report(db, last_rpt_time)
    rval += get_html_mpra_report(db, last_rpt_time)
    rval += get_html_tcc_report(db, last_rpt_time)
    rval += "</body>"
    db.close()

    return rval
Beispiel #4
0
def mpra_fetch_recent(type):
    """
    Retrieve and return the most recent record reported so we don't report the
    same record repeatedly
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if not db.table_exists(table='mpra'):
        CrawlConfig.log("Fetch from not existent mpra table -- return 0")
        return 0

    rows = db.select(table='mpra',
                     fields=['scan_time, end_time'],
                     where='type = ?',
                     data=(type,))
    last_end_time = -1
    max_scan_time = 0
    for r in rows:
        if max_scan_time < r[0]:
            max_scan_time = r[0]
            last_end_time = r[1]

    if last_end_time < 0:
        CrawlConfig.log("No '%s' value in mpra -- returning 0" % type)
        return 0
    else:
        CrawlConfig.log("Fetch '%s' from mpra table -- return %d" %
                        (type, last_end_time))
        return last_end_time
Beispiel #5
0
def get_bitfile_path(bitfile):
    """
    Given a bitfile id, walk back up the tree in HPSS to generate the bitfile's
    path
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')

    rows = db.select(table='nsobject',
                     fields=['parent_id', 'name'],
                     where='bitfile_id = ?',
                     data=(bitfile, ))

    if 1 < len(rows):
        raise U.HpssicError(MSG.multiple_objects_S % hexstr(bitfile))
    elif len(rows) < 1:
        return ("<unnamed bitfile>")

    rval = ''
    while rows:
        x = rows[0]
        if rval == '':
            rval = x['NAME']
        else:
            rval = os.path.join(x['NAME'], rval)

        rows = db.select(table='nsobject',
                         fields=['parent_id', 'name'],
                         where='object_id = ?',
                         data=(x['PARENT_ID'], ))

    return rval
Beispiel #6
0
def record_checked_ids(cfg, low, high, correct, error):
    """
    Save checked NSOBJECT ids in the HPSSIC database.

    If we check a range and get no hits (i.e., no NSOBJECT ids exist in the
    range), we'll store

       (<time>, <low-id>, <high-id>, 0, 0)

    If we get a hit with the right copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 1, 0)

    If we get a hit with the wrong copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 0, 1)
    """
    tabname = cfg.get(sectname(), 'table_name')

    result = dbschem.make_table(tabname)
    ts = int(time.time())
    CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts))
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table=tabname,
              fields=[
                  'check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct',
                  'error'
              ],
              data=[(ts, low, high, correct, error)])
    db.close()
Beispiel #7
0
def get_next_nsobj_id(cfg):
    """
    Read the TCC table in the HPSSIC database to get the next nsobject id. If
    the table does not exist, we create it and return 1 for the next object id
    to check. If the table exists but is empty, we return 1 for the next object
    id to check.
    """
    tabname = cfg.get(sectname(), 'table_name')
    db = CrawlDBI.DBI(dbtype="crawler")
    if not db.table_exists(table=tabname):
        rval = 1
    else:
        rows = db.select(table=tabname, fields=['max(check_time)'])
        max_time = rows[0][0]
        if max_time is None:
            rval = 1
        else:
            rows = db.select(table=tabname,
                             fields=['high_nsobj_id'],
                             where='check_time = ?',
                             data=(max_time, ))
            rval = int(rows[0][0]) + 1
            if highest_nsobject_id() < rval:
                rval = 1
    db.close()
    return rval
Beispiel #8
0
def ttype_map_insert(TT):
    """
    Populate the table PFX_tape_types with the contents of *data*.
    """
    tt_tups = []
    for k in TT:
        if type(k) == int:
            try:
                mtype = TT[k]['label']
            except KeyError:
                mtype = TT[k]['name']
            for l in TT[k]:
                if type(l) == int:
                    try:
                        mstype = TT[k][l]['label']
                    except KeyError:
                        mstype = TT[k][l]['list'][0]

                    tt_tups.append((k, l, '%s/%s' % (mtype, mstype)))

    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table='tape_types',
              fields=['type', 'subtype', 'name'],
              data=tt_tups)
    db.close()
Beispiel #9
0
def popcart(pc_l):
    """
    *pc_l* contains tuples of (path, db val, hsi cart val)
    """
    hp_l = [(x[2], x[0]) for x in pc_l]
    db = CrawlDBI.DBI(dbtype="crawler")
    db.update(table="checkables", fields=["cart"], where="path = ?", data=hp_l)
    db.close()
Beispiel #10
0
def db_select(dbtype='hpss', dbname='sub', **dbargs):
    """
    Issue a select based on arguments passed in. Return the result.
    """
    db = CrawlDBI.DBI(dbtype=dbtype, dbname=dbname)
    result = db.select(**dbargs)
    db.close()
    return result
Beispiel #11
0
def lookup_migr_recs(**dbargs):
    """
    Look up and report records from table BFMIGRREC based on criteria provided
    by the user
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
    rval = db.select(**dbargs)
    db.close()
    return rval
Beispiel #12
0
def reset_path(pathname):
    """
    Reset the fails and reported fields on a rows so it can be rechecked
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    db.update(table='checkables',
              fields=['fails', 'reported'],
              where="path = ?",
              data=[(0, 0, o.pathname)])
    db.close()
Beispiel #13
0
def max_nsobj_id():
    """
    Return the value of the largest NS object id in the nsobject table
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
    result = db.select(table='nsobject',
                       fields=['max(object_id) as max_obj_id'])
    db.close()
    rval = int(result[0]['MAX_OBJ_ID'])
    return rval
Beispiel #14
0
def record_history(name, when, errors):
    """
    Record a plugin name and runtime in the history table
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    if not db.table_exists(table='history'):
        dbschem.make_table('history')
    db.insert(table='history',
              fields=['plugin', 'runtime', 'errors'],
              data=[(name, when, errors)])
    db.close()
Beispiel #15
0
def make_table(tabname, cfg=None):
    """
    Make the indicated table if it does not exist
    """
    db = CrawlDBI.DBI(dbtype='crawler', cfg=cfg)
    if db.table_exists(table=tabname):
        rval = "Already"
    else:
        db.create(table=tabname, fields=tdefs[tabname]['fields'])
        rval = "Created"
    db.close()
    return rval
Beispiel #16
0
def mpra_record_recent(type, start, end, hits):
    """
    Record the most recent record reported so we don't report records
    repeatedly. However, if recent is not later than the time already stored,
    we don't want to update it.
    """
    dbschem.make_table('mpra')
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table='mpra',
              fields=['type', 'scan_time', 'start_time', 'end_time', 'hits'],
              data=[(type, int(time.time()), int(start), int(end), hits)])
    db.close()
Beispiel #17
0
    def load_recheck_list(cls, how_many):
        """
        Look to see whether any of the already checksummed items in the
        database have a last check time over the threshold for rechecking. If
        so, we'll shove some of them to the front of the list based on the
        configuration.
        """
        cfg = CrawlConfig.add_config()
        r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0'))
        r_age = cfg.get_time('cv', 'recheck_age', 365 * 24 * 3600)
        threshold = int(time.time() - r_age)
        CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold)
        if r_fraction == 0.0:
            return []

        limit = round(r_fraction * how_many)

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {
            'table':
            'checkables',
            'fields': [
                'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum',
                'last_check', 'fails', 'reported'
            ],
            'where':
            'checksum <> 0 and last_check < %d' % threshold,
            'orderby':
            'last_check',
            'limit':
            limit
        }

        rows = db.select(**kw)
        db.close()

        rval = []
        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            in_db=True,
                            dirty=False)
            rval.append(new)
        return rval
Beispiel #18
0
def ttype_missing():
    """
    Return a list of records where type = 'f' and ttype is null
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table='checkables',
                     fields=[
                         'rowid', 'path', 'type', 'cos', 'cart', 'ttypes',
                         'checksum', 'last_check', 'fails', 'reported'
                     ],
                     where="type = 'f' and ttypes is null")
    db.close()
    return rows
Beispiel #19
0
def table_list():
    """
    Return the list of HPSS tables from the DB2 database
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
    db._dbobj.tbl_prefix = 'syscat.'
    rows = db.select(table='tables',
                     fields=[
                         "substr(tabname, 1, 30) as \"Table\"",
                         "substr(tabschema, 1, 30) as \"Schema\"", "type"
                     ],
                     where="tabschema = 'HPSS'")
    return rows
Beispiel #20
0
def nulls_from_checkables():
    """
    Return rows from table checkables that contain null values
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    rval = db.select(table="checkables",
                     fields=[
                         "rowid", "path", "type", "cos", "cart", "ttypes",
                         "checksum", "last_check", "fails", "reported"
                     ],
                     where="fails is null or reported is null or cart is null")
    db.close()
    return rval
Beispiel #21
0
def retrieve_history(**kw):
    """
    Retrieve and return the contents of table 'history'. At some point, we may
    need to turn this into a generator so we don't try to load the whole table
    into memory at once, but for now YAGNI.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    kw['table'] = 'history'
    if 'fields' not in kw:
        kw['fields'] = ['plugin', 'runtime', 'errors']
    rows = db.select(**kw)
    db.close()
    return rows
Beispiel #22
0
def lookup_nulls():
    """
    Return records that contain NULL values
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    rval = db.select(table="checkables",
                     fields=[
                         "rowid", "path", "type", "cos", "cart", "ttypes",
                         "checksum", "last_check", "fails", "reported"
                     ],
                     where="cos is NULL or cart is NULL or ttypes is NULL")
    db.close()
    return rval
def load_history():
    """
    Read the contents of table pfx_mpra and load the unique scan_times
    into table pfx_history as mpra runtimes.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table='mpra',
                     fields=['type', 'scan_time', 'hits'])
    db.insert(table='history',
              ignore=True,
              fields=['plugin', 'runtime', 'errors'],
              data=list(rows))
    db.close()
Beispiel #24
0
def get_cos_info(obarg=None):
    """
    Read COS info from tables COS and HIER in the DB2 database
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg')
    rows = db.select(
        table=['cos A', 'hier B'],
        fields=['A.cos_id', 'A.hier_id', 'B.slevel0_migrate_list_count'],
        where="A.hier_id = B.hier_id")
    rval = {}
    for r in rows:
        rval[r['COS_ID']] = r['SLEVEL0_MIGRATE_LIST_COUNT']

    return rval
Beispiel #25
0
def tpop_update_by_path(data):
    """
    Update media type (ttypes) and cartridge names (cart) based on path.

    Incoming *data* is a list of tuples containing ttypes, cart, path, and last
    check.
    """
    zdata = [(d[0], d[1], d[2]) for d in data]
    db = CrawlDBI.DBI(dbtype="crawler")
    db.update(table="checkables",
              fields=["ttypes", "cart"],
              where="path = ?",
              data=zdata)
    db.close()
Beispiel #26
0
def get_checksum_count():
    """
    Return the count of checksums in the crawler database
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if db.table_exists(table="checkables"):
        rows = db.select(table='checkables',
                         fields=["count(path)"],
                         where="checksum = 1")
        checksums = rows[0][0]
    else:
        checksums = 0
    db.close()
    return checksums
Beispiel #27
0
def load_history():
    """
    Read the contents of table pfx_report and load the report times into table
    pfx_history as run times for the report plugin.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table='report',
                     fields=['report_time'])
    insert_data = [('report', x[0], 0) for x in rows]
    db.insert(table='history',
              ignore=True,
              fields=['plugin', 'runtime', 'errors'],
              data=insert_data)
    db.close()
Beispiel #28
0
    def load(self):
        """
        Read a checkable from the database and fill out the object
        """
        db = CrawlDBI.DBI(dbtype='crawler')
        if self.rowid is not None:
            rows = db.select(table='checkables',
                             fields=[
                                 'rowid', 'path', 'type', 'cos', 'cart',
                                 'ttypes', 'checksum', 'last_check', 'fails',
                                 'reported'
                             ],
                             where="rowid = ?",
                             data=(self.rowid, ))
        else:
            rows = db.select(table='checkables',
                             fields=[
                                 'rowid', 'path', 'type', 'cos', 'cart',
                                 'ttypes', 'checksum', 'last_check', 'fails',
                                 'reported'
                             ],
                             where="path = ?",
                             data=(self.path, ))
        if 0 == len(rows):
            self.in_db = False
        elif 1 == len(rows):
            self.in_db = True
            rz = list(rows[0])
            self.rowid = rz.pop(0)
            self.path = rz.pop(0)
            self.type = rz.pop(0)
            self.cos = rz.pop(0)
            self.cart = rz.pop(0)
            self.ttypes = rz.pop(0)
            self.checksum = rz.pop(0)
            self.last_check = rz.pop(0)
            try:
                self.fails = rz.pop(0)
            except IndexError:
                self.fails = 0
            try:
                self.reported = rz.pop(0)
            except IndexError:
                self.reported = 0
            self.dirty = False
        else:
            raise StandardError("There appears to be more than one copy " +
                                "of %s in the database" % self)

        db.close()
Beispiel #29
0
def get_report():
    """
    Generate and return a text report
    """
    db = CrawlDBI.DBI(dbtype="crawler")

    last_report_time = rpt_sublib.get_last_rpt_time(db)
    report = get_cv_report(db, last_report_time)
    report += get_mpra_report(db, last_report_time)
    report += get_tcc_report(db, last_report_time)
    set_last_rpt_time(db)

    db.close()
    return report
Beispiel #30
0
def alter_table(table=None, addcol=None, dropcol=None, pos=None, cfg=None):
    """
    Alter a table, either adding a column (*addcol*) in position *pos*, or
    dropping a column (*dropcol*). This function should be idempotent, so we
    need to check for the column before adding it.
    """
    if cfg:
        db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg)
    else:
        db = CrawlDBI.DBI(dbtype="crawler")

    if addcol and dropcol:
        raise CrawlDBI.DBIerror("addcol and dropcol are mutually exclusive")
    elif addcol:
        fieldname = addcol.split()[0]
    elif dropcol:
        fieldname = dropcol

    try:
        db.alter(table=table, addcol=addcol, dropcol=dropcol, pos=pos)
        rval = "Successful"
    except CrawlDBI.DBIerror as e:
        if (dropcol and
            "Can't DROP '%s'; check that column/key exists" % fieldname
                in str(e)):
            # edit the error number off the front of the message
            rval = re.sub("\s*\d+:\s*", "", e.value)
        elif (addcol and
              "Duplicate column name '%s'" % fieldname
              in str(e)):
            # edit the error number off the front of the message
            rval = re.sub("\s*\d+:\s*", "", e.value)
        else:
            raise

    db.close()
    return rval