コード例 #1
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def nulls_from_checkables():
    """
    Return rows from table checkables that contain null values
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    rval = db.select(table="checkables",
                     fields=[
                         "rowid", "path", "type", "cos", "cart", "ttypes",
                         "checksum", "last_check", "fails", "reported"
                     ],
                     where="fails is null or reported is null or cart is null")
    db.close()
    return rval
コード例 #2
0
def retrieve_history(**kw):
    """
    Retrieve and return the contents of table 'history'. At some point, we may
    need to turn this into a generator so we don't try to load the whole table
    into memory at once, but for now YAGNI.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    kw['table'] = 'history'
    if 'fields' not in kw:
        kw['fields'] = ['plugin', 'runtime', 'errors']
    rows = db.select(**kw)
    db.close()
    return rows
コード例 #3
0
def table_list():
    """
    Return the list of HPSS tables from the DB2 database
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
    db._dbobj.tbl_prefix = 'syscat.'
    rows = db.select(table='tables',
                     fields=[
                         "substr(tabname, 1, 30) as \"Table\"",
                         "substr(tabschema, 1, 30) as \"Schema\"", "type"
                     ],
                     where="tabschema = 'HPSS'")
    return rows
コード例 #4
0
def load_history():
    """
    Read the contents of table pfx_report and load the report times into table
    pfx_history as run times for the report plugin.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table='report',
                     fields=['report_time'])
    insert_data = [('report', x[0], 0) for x in rows]
    db.insert(table='history',
              ignore=True,
              fields=['plugin', 'runtime', 'errors'],
              data=insert_data)
    db.close()
コード例 #5
0
    def load(self):
        """
        Read a checkable from the database and fill out the object
        """
        db = CrawlDBI.DBI(dbtype='crawler')
        if self.rowid is not None:
            rows = db.select(table='checkables',
                             fields=[
                                 'rowid', 'path', 'type', 'cos', 'cart',
                                 'ttypes', 'checksum', 'last_check', 'fails',
                                 'reported'
                             ],
                             where="rowid = ?",
                             data=(self.rowid, ))
        else:
            rows = db.select(table='checkables',
                             fields=[
                                 'rowid', 'path', 'type', 'cos', 'cart',
                                 'ttypes', 'checksum', 'last_check', 'fails',
                                 'reported'
                             ],
                             where="path = ?",
                             data=(self.path, ))
        if 0 == len(rows):
            self.in_db = False
        elif 1 == len(rows):
            self.in_db = True
            rz = list(rows[0])
            self.rowid = rz.pop(0)
            self.path = rz.pop(0)
            self.type = rz.pop(0)
            self.cos = rz.pop(0)
            self.cart = rz.pop(0)
            self.ttypes = rz.pop(0)
            self.checksum = rz.pop(0)
            self.last_check = rz.pop(0)
            try:
                self.fails = rz.pop(0)
            except IndexError:
                self.fails = 0
            try:
                self.reported = rz.pop(0)
            except IndexError:
                self.reported = 0
            self.dirty = False
        else:
            raise StandardError("There appears to be more than one copy " +
                                "of %s in the database" % self)

        db.close()
コード例 #6
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def get_checksum_count():
    """
    Return the count of checksums in the crawler database
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if db.table_exists(table="checkables"):
        rows = db.select(table='checkables',
                         fields=["count(path)"],
                         where="checksum = 1")
        checksums = rows[0][0]
    else:
        checksums = 0
    db.close()
    return checksums
コード例 #7
0
def get_cos_info(obarg=None):
    """
    Read COS info from tables COS and HIER in the DB2 database
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg')
    rows = db.select(
        table=['cos A', 'hier B'],
        fields=['A.cos_id', 'A.hier_id', 'B.slevel0_migrate_list_count'],
        where="A.hier_id = B.hier_id")
    rval = {}
    for r in rows:
        rval[r['COS_ID']] = r['SLEVEL0_MIGRATE_LIST_COUNT']

    return rval
コード例 #8
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def tpop_update_by_path(data):
    """
    Update media type (ttypes) and cartridge names (cart) based on path.

    Incoming *data* is a list of tuples containing ttypes, cart, path, and last
    check.
    """
    zdata = [(d[0], d[1], d[2]) for d in data]
    db = CrawlDBI.DBI(dbtype="crawler")
    db.update(table="checkables",
              fields=["ttypes", "cart"],
              where="path = ?",
              data=zdata)
    db.close()
コード例 #9
0
ファイル: rpt_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def get_report():
    """
    Generate and return a text report
    """
    db = CrawlDBI.DBI(dbtype="crawler")

    last_report_time = rpt_sublib.get_last_rpt_time(db)
    report = get_cv_report(db, last_report_time)
    report += get_mpra_report(db, last_report_time)
    report += get_tcc_report(db, last_report_time)
    set_last_rpt_time(db)

    db.close()
    return report
コード例 #10
0
ファイル: dbschem.py プロジェクト: ORNL-TechInt/hpss-crawler
def alter_table(table=None, addcol=None, dropcol=None, pos=None, cfg=None):
    """
    Alter a table, either adding a column (*addcol*) in position *pos*, or
    dropping a column (*dropcol*). This function should be idempotent, so we
    need to check for the column before adding it.
    """
    if cfg:
        db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg)
    else:
        db = CrawlDBI.DBI(dbtype="crawler")

    if addcol and dropcol:
        raise CrawlDBI.DBIerror("addcol and dropcol are mutually exclusive")
    elif addcol:
        fieldname = addcol.split()[0]
    elif dropcol:
        fieldname = dropcol

    try:
        db.alter(table=table, addcol=addcol, dropcol=dropcol, pos=pos)
        rval = "Successful"
    except CrawlDBI.DBIerror as e:
        if (dropcol and
            "Can't DROP '%s'; check that column/key exists" % fieldname
                in str(e)):
            # edit the error number off the front of the message
            rval = re.sub("\s*\d+:\s*", "", e.value)
        elif (addcol and
              "Duplicate column name '%s'" % fieldname
              in str(e)):
            # edit the error number off the front of the message
            rval = re.sub("\s*\d+:\s*", "", e.value)
        else:
            raise

    db.close()
    return rval
コード例 #11
0
def load_history():
    """
    Read the contents of table pfx_tcc_data and load the unique check_times
    into table pfx_history as tcc runtimes.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table='tcc_data',
                     fields=['check_time', 'sum(error)'],
                     groupby='check_time')
    insert_data = [('tcc', x[0], x[1]) for x in rows]
    db.insert(table='history',
              ignore=True,
              fields=['plugin', 'runtime', 'errors'],
              data=insert_data)
    db.close()
コード例 #12
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def get_match_fail_count():
    """
    Return the match and fail counts in the crawler database
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if db.table_exists(table=stats_table):
        rows = db.select(table=stats_table,
                         fields=["matches", "failures"],
                         where="rowid = 1")
        (matches, failures) = rval = rows[0]
    else:
        (matches, failures) = (0, 0)

    db.close()
    return (matches, failures)
コード例 #13
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def prep_popcart(where, limit):
    """
    Get a list of paths and carts from database based on where. If 0 < limit,
    no more than limit records will be retrieved.
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    kw = {}
    if 0 < limit:
        kw['limit'] = limit
    rows = db.select(table="checkables",
                     fields=["path", "cart"],
                     where=where,
                     **kw)
    db.close()
    return rows
コード例 #14
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def ttype_cart_lookup(cartname):
    """
    Look up *cartname* in HPSS table PVLPV and return the cartridge's media
    type and subtype.
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='cfg')
    rows = db.select(table="pvlpv",
                     fields=[
                         "phys_vol_type_type",
                         "phys_vol_type_subtype",
                     ],
                     where="phys_vol_id = ?",
                     data=(cartname, ))
    db.close()
    return (rows[0]['PHYS_VOL_TYPE_TYPE'], rows[0]['PHYS_VOL_TYPE_SUBTYPE'])
コード例 #15
0
 def find_by_path(self):
     """
     Look up the current item in the database based on path, returning the
     database row(s).
     """
     db = CrawlDBI.DBI(dbtype='crawler')
     rv = db.select(table='checkables',
                    fields=[
                        'rowid', 'path', 'type', 'cos', 'cart', 'ttypes',
                        'checksum', 'last_check', 'fails', 'reported'
                    ],
                    where='path=?',
                    data=(self.path, ))
     db.close()
     return rv
コード例 #16
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def ttype_map_desc(type, subtype):
    """
    Look up *type* and *subtype* in the crawler table tape_types and return
    the corresponding media description.
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    rows = db.select(table="tape_types",
                     fields=["name"],
                     where="type = ? and subtype = ?",
                     data=(
                         type,
                         subtype,
                     ))
    db.close()
    return rows[0][0]
コード例 #17
0
def load_history(filename):
    """
    Read log file *filename* and create records in table pfx_history
    corresponding to each time the cv plugin was run.

    Line containing 'cv_plugin' and 'firing up' indicates run time.

    Subsequent line containing 'cv_plugin' and 'failures: %d' indicate errors
    for the run.
    """
    # -------------------------------------------------------------------------
    def cv_fires(line):
        """
        Parse *line* to decide whether it indicates a firing of the cv plugin.
        """
        return all([runtime is None,
                    error is None,
                    'cv_plugin' in line or 'checksum-verifier' in line,
                    'firing up' in line])

    # -------------------------------------------------------------------------
    def cv_completes(line):
        """
        Parse *line* to decide whether it indicates completion of a firing of
        the cv plugin.
        """
        return all([runtime is not None,
                    error is None,
                    'cv_plugin' in line or 'checksum-verifier' in line,
                    'failures:' in line,
                    'totals' not in line])

    # -------------------------------------------------------------------------
    db = CrawlDBI.DBI(dbtype='crawler')
    runtime = error = None
    with open(filename, 'r') as f:
        for line in f:
            if cv_fires(line):
                runtime = U.epoch(line[0:18])
            if cv_completes(line):
                error = int(U.rgxin('failures: (\d+)', line))
            if runtime is not None and error is not None:
                db.insert(table='history',
                          ignore=True,
                          fields=['plugin', 'runtime', 'errors'],
                          data=[('cv', runtime, error)])
                runtime = error = None
    db.close()
コード例 #18
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def lookup_checksum_by_path(path):
    """
    Retrieve a row by path and return the checksum value
    """

    db = CrawlDBI.DBI(dbtype="crawler")
    rows = db.select(table="checkables",
                     fields=["checksum"],
                     where="path = '%s'" % path)
    if 0 == len(rows):
        rval = -1
    elif 1 == len(rows):
        rval = rows[0][0]
    else:
        raise SystemExit("Too many records found for path %s" % path)
    db.close()
コード例 #19
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def tpop_select_all(db=None):
    """
    Return a list of all the 'f' (file) type checkables records with a null
    ttypes or cart.
    """
    close = False
    if db is None:
        db = CrawlDBI.DBI(dbtype='crawler')
        close = True
    rval = db.select(table="checkables",
                     fields=["path", "type", "ttypes", "cart", "last_check"],
                     where="type = 'f' and " +
                     "(ttypes is NULL or cart is NULL)")
    if close:
        db.close()
    return rval
コード例 #20
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def update_stats(cmf):
    """
    Record the values in tuple cmf in table cvstats in the database. If the
    table does not exist, create it.
    """
    result = dbschem.make_table(stats_table)
    db = CrawlDBI.DBI(dbtype="crawler")
    if result == "Created":
        db.insert(table=stats_table,
                  fields=["rowid", "matches", "failures"],
                  data=[(1, 0, 0)])

    db.update(table=stats_table,
              fields=["matches", "failures"],
              data=[cmf],
              where="rowid = 1")
    db.close()
コード例 #21
0
def bitfile_lookup(bflist, dbh=None):
    """
    Lookup each of the ids in *bflist* in table BITFILE. Return the count of
    records found.
    """
    local_connect = False
    if dbh is None:
        dbh = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
        local_connect = True

    count = dbh.select(table='bitfile',
                       fields=['count(bfid) as bfid_count'],
                       where='bfid in (%s)' % ','.join(bflist))

    if local_connect:
        dbh.close()

    return (count[0]['BFID_COUNT'])
コード例 #22
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def lscos_show():
    """
    Report the contents of the lscos table if it exists
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if not db.table_exists(table='lscos'):
        rval = "\n    Table lscos does not exist\n"
    else:
        rows = db.select(
            table='lscos',
            fields=['cos', 'name', 'copies', 'min_size', 'max_size'])
        db.close()

        rval = ""
        for r in rows:
            rval += "%s %-35s %3d %10d %10d\n" % r

    return rval
コード例 #23
0
def lscos_populate():
    """
    If table lscos already exists, we're done. Otherwise, retrieve the lscos
    info from hsi, create the table, and fill the table in.

    We store the min_size and max_size for each COS as text strings containing
    digits because the largest sizes are already within three orders of
    magnitude of a mysql bigint and growing.
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    tabname = 'lscos'
    st = dbschem.make_table(tabname)
    szrgx = "(\d+([KMGT]B)?)"
    rgx = ("\s*(\d+)\s*(([-_a-zA-Z0-9]+\s)+)\s+[UGAN]*\s+(\d+)" +
           "\s+(ALL)?\s+%s\s+-\s+%s" % (szrgx, szrgx))
    if "Created" == st:
        H = hpss.HSI()
        raw = H.lscos()
        H.quit()

        z = [x.strip() for x in raw.split('\r')]
        rules = [q for q in z if '----------' in q]
        first = z.index(rules[0]) + 1
        second = z[first:].index(rules[0]) + first
        lines = z[first:second]
        data = []
        for line in lines:
            m = U.rgxin(rgx, line)
            (cos, desc, copies, lo_i, hi_i) = (m[0],
                                               m[1].strip(),
                                               m[3],
                                               U.scale(m[5], kb=1024),
                                               U.scale(m[7], kb=1024))
            data.append((cos, desc, copies, lo_i, hi_i))

        db.insert(table=tabname,
                  fields=['cos', 'name', 'copies', 'min_size', 'max_size'],
                  data=data)
        rval = MSG.table_created_S % tabname
    else:
        rval = MSG.table_already_S % tabname

    db.close()
    return rval
コード例 #24
0
def cos_description(name):
    """
    Look up and return the description of cos *name*
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    if not db.table_exists(table='lscos'):
        cv_sublib.lscos_populate()

    r = db.select(table='lscos',
                  fields=['name'],
                  where="cos = ?",
                  data=(name,))
    if 0 < len(r):
        rval = r[0][0]
    else:
        rval = "???"
    db.close()

    return rval
コード例 #25
0
def distinct_objects(where="", db=None):
    """
    Return distinct objects from tcc_data where correct == 1 or correct <> 1
    """
    need_close = False
    if db is None:
        db = CrawlDBI.DBI(dbtype="crawler")
        need_close = True

    kw = {'table': "tcc_data", 'fields': ["distinct(low_nsobj_id)"]}
    if where != "":
        kw['where'] = where
        kw['fields'].append("correct")

    rows = db.select(**kw)

    if need_close:
        db.close()

    return rows
コード例 #26
0
def xplocks(output=None, mark=False):
    """
    Look for expired purge locks in bfpurgerec.
    """
    cfg = CrawlConfig.get_config()
    now = time.time()
    hits = 0

    opened = True
    if output is None:
        f = open(cfg.get('mpra', 'report_file'), 'a')
    elif type(output) == str:
        f = open(output, 'a')
    elif type(output) == file:
        f = output
        opened = False
    else:
        raise StandardError("output type must be 'str' or 'file' ")

    dbs = CrawlDBI.DBI(dbtype='hpss', dbname='sub')

    lock_min = cfg.getint('mpra', 'lock_duration')

    rows = dbs.select(table='bfpurgerec',
                      fields=['bfid', 'record_lock_time'],
                      where='record_lock_time <> 0')
    if 0 < len(rows):
        f.write("Expired Purge Locks\n")
        for r in rows:
            if (lock_min * 60) < (now - r['RECORD_LOCK_TIME']):
                hits += 1
                f.write("   %s  %s\n" % (CrawlDBI.DBIdb2.hexstr(r['BFID']),
                                         util.ymdhms(r['RECORD_LOCK_TIME'])))

    if mark:
        mpra_record_recent('purge', 0, 0, hits)

    if opened:
        f.close()

    return hits
コード例 #27
0
ファイル: dbschem.py プロジェクト: ORNL-TechInt/hpss-crawler
def drop_tables_matching(tablike):
    """
    Drop tables with names matching the *tablike* expression. At the time of
    writing, this is only used for drop test tables ('test_%')
    """
    tcfg = CrawlConfig.get_config()
    tcfg.set('dbi-crawler', 'tbl_prefix', '')
    db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler')
    if CrawlDBI.mysql_available and 'mysql' in str(db):

        # db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler')
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",
                                    "Can't read dir of .*")
            tlist = db.select(table="information_schema.tables",
                              fields=['table_name'],
                              where="table_name like '%s'" % tablike)
            for (tname,) in tlist:
                if db.table_exists(table=tname):
                    db.drop(table=tname)
    db.close()
コード例 #28
0
def nsobject_lookup(start_id, id_count, dbh=None):
    """
    Lookup the records in NSOBJECT beginning with start_id and continuing for
    id_count entries. Return the list of corresponding bitfile ids.
    """
    local_connect = False
    if dbh is None:
        dbh = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
        local_connect = True

    bflist = dbh.select(table='nsobject',
                        fields=['bitfile_id'],
                        where='? <= object_id and object_id < ?',
                        data=(start_id, start_id + id_count),
                        limit=id_count)

    if local_connect:
        dbh.close()

    rval = [CrawlDBI.DBIdb2.hexstr(z['BITFILE_ID']) for z in bflist]
    return rval
コード例 #29
0
def nsobj_id(name='', parent=None):
    """
    Look up an nsobject id based on name and, optionally, parent id
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')
    if name == '':
        return -1
    elif name != '' and parent is None:
        where = "name = '%s'" % name
    elif name != '' and parent is not None:
        where = "name = '%s' and parent_id=%d" % (name, parent)

    rows = db.select(table='hpss.nsobject',
                     fields=['object_id', 'parent_id'],
                     where=where)
    db.close()
    try:
        rval = (rows[0]['OBJECT_ID'], rows[0]['PARENT_ID'])
    except IndexError:
        raise U.HpssicError(MSG.no_such_path_component_SD % (name, parent))

    return rval
コード例 #30
0
def get_bitfile_set(first_nsobj_id, limit):
    """
    Get a collection of bitfiles from DB2 returning a dict. The bitfiles in the
    set begin with object_id first_nsobj_id and end with the one before
    last_nsobj_id.
    """
    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')

    bfid_list = nsobject_lookup(first_nsobj_id, limit, dbh=db)
    if 0 == len(bfid_list):
        db.close()
        raise U.HpssicError(MSG.not_in_nsobject_D % first_nsobj_id)

    n_found = bitfile_lookup(bfid_list, dbh=db)
    if 0 == n_found:
        db.close()
        raise U.HpssicError(MSG.not_in_bitfile_S % bfid_list[0])

    rval = db.select(table=['nsobject A', 'bitfile B', 'bftapeseg C'],
                     fields=[
                         'A.object_id', 'B.bfid', 'B.bfattr_cos_id',
                         'B.bfattr_create_time',
                         'count(C.storage_class) as sc_count'
                     ],
                     where="A.bitfile_id = B.bfid and B.bfid = C.bfid and " +
                     "B.bfattr_data_len > 0 and C.bf_offset = 0 and " +
                     "? <= A.object_id and A.object_id < ? ",
                     groupby=", ".join([
                         "A.object_id", "B.bfid", "B.bfattr_cos_id",
                         "B.bfattr_create_time"
                     ]),
                     data=(first_nsobj_id, first_nsobj_id + limit),
                     limit=limit)
    db.close()

    if 0 == len(rval):
        raise U.HpssicError(MSG.not_in_bftapeseg_S % bfid_list[0])

    return rval