コード例 #1
0
def record_history(name, when, errors):
    """
    Record a plugin name and runtime in the history table
    """
    db = CrawlDBI.DBI(dbtype='crawler')
    if not db.table_exists(table='history'):
        dbschem.make_table('history')
    db.insert(table='history',
              fields=['plugin', 'runtime', 'errors'],
              data=[(name, when, errors)])
    db.close()
コード例 #2
0
def mpra_record_recent(type, start, end, hits):
    """
    Record the most recent record reported so we don't report records
    repeatedly. However, if recent is not later than the time already stored,
    we don't want to update it.
    """
    dbschem.make_table('mpra')
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table='mpra',
              fields=['type', 'scan_time', 'start_time', 'end_time', 'hits'],
              data=[(type, int(time.time()), int(start), int(end), hits)])
    db.close()
コード例 #3
0
ファイル: cv.py プロジェクト: ORNL-TechInt/hpss-crawler
def cvv_ttype_table(argv):
    """ttype_table - create (or drop) table tape_types

    usage: cv ttype_table [-d] {-D|-r /opt/hpss}

    Without the -D/--drop option, create the table tape_types in the mysql
    database. Populate it with information from an HPSS build tree (default is
    /opt/hpss).

    With -D or --drop, drop the table.
    """
    p = optparse.OptionParser()
    p.add_option('-D',
                 '--drop',
                 action='store_true',
                 default=False,
                 dest='drop',
                 help='drop the table')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-r',
                 '--root',
                 action='store',
                 default='',
                 dest='hpssroot',
                 help='where to look for data')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    # lookup and report tape type for each pathname specified
    if o.drop:
        result = dbschem.drop_table(table="tape_types")
        print result
    else:
        dbschem.make_table("tape_types")

        hpssroot = o.hpssroot
        if hpssroot == '':
            hpssroot = os.getenv("HPSS_ROOT")
        if hpssroot is None:
            hpssroot = "/opt/hpss"

        tape_types_populate(hpssroot)
コード例 #4
0
ファイル: tcc_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def record_checked_ids(cfg, low, high, correct, error):
    """
    Save checked NSOBJECT ids in the HPSSIC database.

    If we check a range and get no hits (i.e., no NSOBJECT ids exist in the
    range), we'll store

       (<time>, <low-id>, <high-id>, 0, 0)

    If we get a hit with the right copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 1, 0)

    If we get a hit with the wrong copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 0, 1)
    """
    tabname = cfg.get(sectname(), 'table_name')

    result = dbschem.make_table(tabname)
    ts = int(time.time())
    CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts))
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table=tabname,
              fields=['check_time',
                      'low_nsobj_id',
                      'high_nsobj_id',
                      'correct',
                      'error'],
              data=[(ts, low, high, correct, error)])
    db.close()
コード例 #5
0
def record_checked_ids(cfg, low, high, correct, error):
    """
    Save checked NSOBJECT ids in the HPSSIC database.

    If we check a range and get no hits (i.e., no NSOBJECT ids exist in the
    range), we'll store

       (<time>, <low-id>, <high-id>, 0, 0)

    If we get a hit with the right copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 1, 0)

    If we get a hit with the wrong copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 0, 1)
    """
    tabname = cfg.get(sectname(), 'table_name')

    result = dbschem.make_table(tabname)
    ts = int(time.time())
    CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts))
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table=tabname,
              fields=[
                  'check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct',
                  'error'
              ],
              data=[(ts, low, high, correct, error)])
    db.close()
コード例 #6
0
ファイル: cv.py プロジェクト: ORNL-TechInt/hpss-crawler
def cvv_ttype_table(argv):
    """ttype_table - create (or drop) table tape_types

    usage: cv ttype_table [-d] {-D|-r /opt/hpss}

    Without the -D/--drop option, create the table tape_types in the mysql
    database. Populate it with information from an HPSS build tree (default is
    /opt/hpss).

    With -D or --drop, drop the table.
    """
    p = optparse.OptionParser()
    p.add_option('-D', '--drop',
                 action='store_true', default=False, dest='drop',
                 help='drop the table')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-r', '--root',
                 action='store', default='', dest='hpssroot',
                 help='where to look for data')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    # lookup and report tape type for each pathname specified
    if o.drop:
        result = dbschem.drop_table(table="tape_types")
        print result
    else:
        dbschem.make_table("tape_types")

        hpssroot = o.hpssroot
        if hpssroot == '':
            hpssroot = os.getenv("HPSS_ROOT")
        if hpssroot is None:
            hpssroot = "/opt/hpss"

        tape_types_populate(hpssroot)
コード例 #7
0
ファイル: crawl.py プロジェクト: ORNL-TechInt/hpss-crawler
def history_load(loadlist, filename):
    """
    Each plugin's sublib has a load_history() routine that knows how to load
    its data to the history file.

    Unfortunately, we do have to know here something special about plugin 'cv'
    to warn the user when a filename was specified without 'cv' in the load
    list or vice versa and when to pass filename to the plugin's load_history()
    method.
    """
    cfg = CrawlConfig.add_config()
    pluglist = U.csv_list(cfg.get_d('crawler', 'plugins', U.default_plugins()))
    ll = U.csv_list(loadlist)
    if 'all' in ll or ll == []:
        ll = copy.deepcopy(pluglist)

    if filename is None and 'cv' in ll:
        print(MSG.history_cv_not_loaded)
        ll.remove('cv')
    elif filename is not None and 'cv' not in ll:
        print(MSG.history_filename_ignored)

    unk_plugs = [x for x in ll if x not in pluglist]
    if 0 < len(unk_plugs):
        print(MSG.unrecognized_plugin_S % ', '.join(unk_plugs))
        map(ll.remove, unk_plugs)

    if ll == []:
        return

    dbschem.make_table('history')
    for plug in [x for x in ll if x in pluglist]:
        print("loading %s..." % plug)
        if plug == 'cv' and filename is not None:
            args = [filename]
        else:
            args = []
        p = CrawlPlugin.CrawlPlugin(name=plug, cfg=cfg)
        p.load_history(*args)
コード例 #8
0
ファイル: crawl.py プロジェクト: ORNL-TechInt/hpss-crawler
def history_load(loadlist, filename):
    """
    Each plugin's sublib has a load_history() routine that knows how to load
    its data to the history file.

    Unfortunately, we do have to know here something special about plugin 'cv'
    to warn the user when a filename was specified without 'cv' in the load
    list or vice versa and when to pass filename to the plugin's load_history()
    method.
    """
    cfg = CrawlConfig.add_config()
    pluglist = U.csv_list(cfg.get_d('crawler', 'plugins', U.default_plugins()))
    ll = U.csv_list(loadlist)
    if 'all' in ll or ll == []:
        ll = copy.deepcopy(pluglist)

    if filename is None and 'cv' in ll:
        print(MSG.history_cv_not_loaded)
        ll.remove('cv')
    elif filename is not None and 'cv' not in ll:
        print(MSG.history_filename_ignored)

    unk_plugs = [x for x in ll if x not in pluglist]
    if 0 < len(unk_plugs):
        print(MSG.unrecognized_plugin_S % ', '.join(unk_plugs))
        map(ll.remove, unk_plugs)

    if ll == []:
        return

    dbschem.make_table('history')
    for plug in [x for x in ll if x in pluglist]:
        print("loading %s..." % plug)
        if plug == 'cv' and filename is not None:
            args = [filename]
        else:
            args = []
        p = CrawlPlugin.CrawlPlugin(name=plug, cfg=cfg)
        p.load_history(*args)
コード例 #9
0
    def ex_nihilo(cls, dataroot='/'):
        """
        Start from scratch. Create the database if necessary. Create the
        table(s) if necessary. Bootstrap the queue by adding the root
        director(ies).

        Field path is the location of the file or directory in the HPSS
        archive.

        Field type is 'f' for files or 'd' for directories.

        Field cos is the class of service for the file. For directories, cos is
        empty.

        Field cart starts with a null value. When populated from hsi, it may be
        set to the name of a tape cartridge or to ''. Empty files take up no
        space on any cartridge, so for them the field is empty.

        Field checksum is 0 if we have not computed or discoverd a checksum for
        the file. Once we know a checksum has been stored for the file, we set
        this to 1.

        Field last_check is the epoch time at which the file was last checked.

        Field fails is the number of times hashcreate and/or hashverify has
        failed on the file.

        Field reported is 0 or 1 indicating whether we've reported
        """
        dbschem.make_table("checkables")
        if type(dataroot) == str:
            dataroot = [dataroot]

        if type(dataroot) == list:
            for root in dataroot:
                r = Checkable(path=root, type='d', in_db=False, dirty=True)
                r.load()
                r.persist()
コード例 #10
0
    def ex_nihilo(cls, dataroot='/'):
        """
        Start from scratch. Create the database if necessary. Create the
        table(s) if necessary. Bootstrap the queue by adding the root
        director(ies).

        Field path is the location of the file or directory in the HPSS
        archive.

        Field type is 'f' for files or 'd' for directories.

        Field cos is the class of service for the file. For directories, cos is
        empty.

        Field cart starts with a null value. When populated from hsi, it may be
        set to the name of a tape cartridge or to ''. Empty files take up no
        space on any cartridge, so for them the field is empty.

        Field checksum is 0 if we have not computed or discoverd a checksum for
        the file. Once we know a checksum has been stored for the file, we set
        this to 1.

        Field last_check is the epoch time at which the file was last checked.

        Field fails is the number of times hashcreate and/or hashverify has
        failed on the file.

        Field reported is 0 or 1 indicating whether we've reported
        """
        dbschem.make_table("checkables")
        if type(dataroot) == str:
            dataroot = [dataroot]

        if type(dataroot) == list:
            for root in dataroot:
                r = Checkable(path=root, type='d', in_db=False, dirty=True)
                r.load()
                r.persist()
コード例 #11
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def update_stats(cmf):
    """
    Record the values in tuple cmf in table cvstats in the database. If the
    table does not exist, create it.
    """
    result = dbschem.make_table(stats_table)
    db = CrawlDBI.DBI(dbtype="crawler")
    if result == "Created":
        db.insert(table=stats_table,
                  fields=["rowid", "matches", "failures"],
                  data=[(1, 0, 0)])

    db.update(table=stats_table,
              fields=["matches", "failures"],
              data=[cmf],
              where="rowid = 1")
    db.close()
コード例 #12
0
ファイル: cv_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def update_stats(cmf):
    """
    Record the values in tuple cmf in table cvstats in the database. If the
    table does not exist, create it.
    """
    result = dbschem.make_table(stats_table)
    db = CrawlDBI.DBI(dbtype="crawler")
    if result == "Created":
        db.insert(table=stats_table,
                  fields=["rowid", "matches", "failures"],
                  data=[(1, 0, 0)])

    db.update(table=stats_table,
              fields=["matches", "failures"],
              data=[cmf],
              where="rowid = 1")
    db.close()
コード例 #13
0
ファイル: rpt_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def get_last_rpt_time(db):
    """
    Retrieve the last report time from the report table. If the table does not
    exist before make_table ('Created' in result), the table is empty so we
    just return 0 to indicate no last report time.
    """
    result = dbschem.make_table("report")
    if "Created" in result:
        rval = 0
    else:
        rows = db.select(table='report', fields=['max(report_time)'])
        (rval) = rows[0][0]
        if rval is None:
            rval = 0

    CrawlConfig.log("time of last report: %d" % rval)
    return rval
コード例 #14
0
ファイル: rpt_lib.py プロジェクト: ORNL-TechInt/hpss-crawler
def get_last_rpt_time(db):
    """
    Retrieve the last report time from the report table. If the table does not
    exist before make_table ('Created' in result), the table is empty so we
    just return 0 to indicate no last report time.
    """
    result = dbschem.make_table("report")
    if "Created" in result:
        rval = 0
    else:
        rows = db.select(table='report',
                         fields=['max(report_time)'])
        (rval) = rows[0][0]
        if rval is None:
            rval = 0

    CrawlConfig.log("time of last report: %d" % rval)
    return rval
コード例 #15
0
def lscos_populate():
    """
    If table lscos already exists, we're done. Otherwise, retrieve the lscos
    info from hsi, create the table, and fill the table in.

    We store the min_size and max_size for each COS as text strings containing
    digits because the largest sizes are already within three orders of
    magnitude of a mysql bigint and growing.
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    tabname = 'lscos'
    st = dbschem.make_table(tabname)
    szrgx = "(\d+([KMGT]B)?)"
    rgx = ("\s*(\d+)\s*(([-_a-zA-Z0-9]+\s)+)\s+[UGAN]*\s+(\d+)" +
           "\s+(ALL)?\s+%s\s+-\s+%s" % (szrgx, szrgx))
    if "Created" == st:
        H = hpss.HSI()
        raw = H.lscos()
        H.quit()

        z = [x.strip() for x in raw.split('\r')]
        rules = [q for q in z if '----------' in q]
        first = z.index(rules[0]) + 1
        second = z[first:].index(rules[0]) + first
        lines = z[first:second]
        data = []
        for line in lines:
            m = U.rgxin(rgx, line)
            (cos, desc, copies, lo_i, hi_i) = (m[0],
                                               m[1].strip(),
                                               m[3],
                                               U.scale(m[5], kb=1024),
                                               U.scale(m[7], kb=1024))
            data.append((cos, desc, copies, lo_i, hi_i))

        db.insert(table=tabname,
                  fields=['cos', 'name', 'copies', 'min_size', 'max_size'],
                  data=data)
        rval = MSG.table_created_S % tabname
    else:
        rval = MSG.table_already_S % tabname

    db.close()
    return rval