Ejemplo n.º 1
0
def cvv_ttype_missing(argv):
    """ttype_missing - Report records missing ttype information

    usage: cv ttype_missing [-d]
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-c',
                 '--config',
                 action='store',
                 default='',
                 dest='config',
                 help='configuration to use')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    CrawlConfig.get_config(o.config)
    rec_l = cv_lib.ttype_missing()
    for rec in rec_l:
        print("%-40s %-10s %s %s" %
              (rec[1], rec[4], rec[5], U.ymdhms(int(rec[7]))))
Ejemplo n.º 2
0
def cvv_ttype_missing(argv):
    """ttype_missing - Report records missing ttype information

    usage: cv ttype_missing [-d]
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-c', '--config',
                 action='store', default='', dest='config',
                 help='configuration to use')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    CrawlConfig.get_config(o.config)
    rec_l = cv_lib.ttype_missing()
    for rec in rec_l:
        print("%-40s %-10s %s %s" % (rec[1],
                                     rec[4],
                                     rec[5],
                                     U.ymdhms(int(rec[7]))))
Ejemplo n.º 3
0
def cvv_report(argv):
    """report - show the checksum verifier database status

    select count(*) from checkables where type = 'f';
    select count(*) from checkables where checksum <> 0;
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-p',
                 '--prefix',
                 action='store',
                 default='',
                 dest='prefix',
                 help='table name prefix')
    p.add_option('-v',
                 '--verbose',
                 action='store_true',
                 default=False,
                 dest='verbose',
                 help='pass verbose flag to HSI object')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config != '':
        cfg = CrawlConfig.get_config(o.config)
    else:
        cfg = CrawlConfig.get_config()

    if o.prefix != '':
        cfg.set('dbi', 'tbl_prefix', o.prefix)

    dim = {}
    dim['cos'] = Dimension.get_dim('cos')
    dim['ttypes'] = Dimension.get_dim('ttypes')

    print dim['cos'].report()
    print dim['ttypes'].report()
Ejemplo n.º 4
0
def crl_cfgdump(argv):
    """cfgdump - load a config file and dump its contents

    usage: crawl cfgdump -c <filename> [--to stdout|log] [--logpath <path>]
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-t', '--to',
                 action='store', default='', dest='target',
                 help='specify where to send the output')
    p.add_option('-l', '--logpath',
                 action='store', default='', dest='logpath',
                 help='specify where to send the output')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    if o.target == '':
        o.target = 'stdout'

    cfg = CrawlConfig.get_config(o.config)
    dumpstr = cfg.dump()

    if o.target == 'stdout':
        print dumpstr
    elif o.target == 'log':
        log = CrawlConfig.log(logpath=o.logpath, cfg=cfg)
        for line in dumpstr.split("\n"):
            CrawlConfig.log(line)
Ejemplo n.º 5
0
def crl_log(argv):
    """log - write a message to the indicated log file

    usage: crawl log --log <filename> <message>
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--log',
                 action='store',
                 default=None,
                 dest='logfile',
                 help='specify the log file')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log(" ".join(a), logpath=o.logfile, cfg=cfg)
Ejemplo n.º 6
0
def tccp_zreport(args):
    """zreport - show what tcc_report will do with a bitfile id

    usage: tcc zreport NSOBJECT-ID

    Note: This will only report bitfiles where the COS count and file count
    differ. Giving it any old object id won't necessarily generate any output.
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    try:
        nsobj_id = a[0]
    except:
        print("usage: tcc zreport OBJECT_ID")
        return

    cfg = CrawlConfig.get_config()
    outfile = cfg.get(tcc_lib.sectname(), 'report_file')

    cosinfo = tcc_lib.get_cos_info()
    try:
        bfl = tcc_lib.get_bitfile_set(int(nsobj_id), 1)
    except U.HpssicError as e:
        bfl = []
        pass
    print("Writing output to %s" % outfile)
    for bf in bfl:
        tcc_lib.tcc_report(bf, cosinfo)
Ejemplo n.º 7
0
def crl_dbdrop(argv):
    """dbdrop - drop a database table

    usage: crawl dbdrop [-f] <table-name>

    Drop database table <table-name>
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-f',
                 '--force',
                 action='store_true',
                 default=False,
                 dest='force',
                 help='proceed without confirmation')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    tbpfx = cfg.get('dbi', 'tbl_prefix')
    tname = a[0]
    answer = raw_input("About to drop db table %s_%s. Are you sure? > " %
                       (tbpfx, tname))
    if answer[0].lower() != "y":
        sys.exit()

    result = dbschem.drop_table(cfg, tname)
    print(result)
Ejemplo n.º 8
0
def simplug(plugin, args):
    """
    Common plugin simulator. May be used by the interactive tools to simulate
    running the associated plugin.
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-i', '--iterations',
                 action='store', default=1, dest='iterations', type='int',
                 help='how many iterations to run')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log("starting %s simplug, just got config" % plugin)
    sys.path.append(cfg.get('crawler', 'plugin-dir'))
    modname = cfg.get(plugin, 'module')
    try:
        P = __import__(modname)
    except ImportError:
        H = __import__('hpssic.plugins.' + modname)
        P = getattr(H.plugins, modname)
    P.main(cfg)
    if 1 < o.iterations:
        for count in range(o.iterations-1):
            stime = cfg.get_time(plugin, 'frequency')
            time.sleep(stime)
            P.main(cfg)
Ejemplo n.º 9
0
def tcc_report(bitfile, cosinfo=None, path=None, log=True, store=True):
    """
    The bitfile appears to not have the right number of copies. We're going to
    write its information out to a report for manual followup.
    """
    cosinfo = get_cos_info()
    fmt = "%7s %8s %8s %s"
    hdr = fmt % ("COS", "Ccopies", "Fcopies", "Filepath")

    # Compute the bitfile's path
    if path is None:
        bfp = get_bitfile_path(bitfile['BFID'])
    else:
        bfp = path
    rpt = fmt % (bitfile['BFATTR_COS_ID'],
                 str(cosinfo[bitfile['BFATTR_COS_ID']]),
                 str(bitfile['SC_COUNT']), bfp)
    if log:
        CrawlConfig.log(rpt)
    if store:
        try:
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
        except AttributeError:
            cfg = CrawlConfig.get_config()
            rptfname = cfg.get(sectname(), 'report_file')
            tcc_report._f = open(rptfname, 'a')
            tcc_report._f.write(hdr)
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
    return rpt
Ejemplo n.º 10
0
def tcc_report(bitfile, cosinfo=None, path=None, log=True, store=True):
    """
    The bitfile appears to not have the right number of copies. We're going to
    write its information out to a report for manual followup.
    """
    cosinfo = get_cos_info()
    fmt = "%7s %8s %8s %s"
    hdr = fmt % ("COS", "Ccopies", "Fcopies", "Filepath")

    # Compute the bitfile's path
    if path is None:
        bfp = get_bitfile_path(bitfile['BFID'])
    else:
        bfp = path
    rpt = fmt % (bitfile['BFATTR_COS_ID'],
                 str(cosinfo[bitfile['BFATTR_COS_ID']]),
                 str(bitfile['SC_COUNT']),
                 bfp)
    if log:
        CrawlConfig.log(rpt)
    if store:
        try:
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
        except AttributeError:
            cfg = CrawlConfig.get_config()
            rptfname = cfg.get(sectname(), 'report_file')
            tcc_report._f = open(rptfname, 'a')
            tcc_report._f.write(hdr)
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
    return rpt
Ejemplo n.º 11
0
def crl_dbdrop(argv):
    """dbdrop - drop a database table

    usage: crawl dbdrop [-f] <table-name>

    Drop database table <table-name>
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-f', '--force',
                 action='store_true', default=False, dest='force',
                 help='proceed without confirmation')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    tbpfx = cfg.get('dbi', 'tbl_prefix')
    tname = a[0]
    answer = raw_input("About to drop db table %s_%s. Are you sure? > " %
                       (tbpfx, tname))
    if answer[0].lower() != "y":
        sys.exit()

    result = dbschem.drop_table(cfg, tname)
    print(result)
Ejemplo n.º 12
0
def mprf_reset(args):
    """reset - drop the mpra table and remove mpra_report.txt

    usage: mpra reset

    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-f', '--force',
                 action='store_true', default=False, dest='force',
                 help='force the operation')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    if not o.force:
        answer = raw_input(MSG.all_mpra_data_lost)
        if answer[0].lower() != "y":
            raise SystemExit()

    cfg = CrawlConfig.get_config(o.config)

    dbschem.drop_table(cfg=cfg, table='mpra')

    filename = cfg.get('mpra', 'report_file')
    util.conditional_rm(filename)
Ejemplo n.º 13
0
def drop_table(cfg=None, prefix=None, table=None):
    """
    This wraps the table dropping operation.
    """
    if table is None:
        return(MSG.nothing_to_drop)

    if cfg is None:
        cfg = CrawlConfig.get_config()

    if prefix is None:
        prefix = cfg.get('dbi-crawler', 'tbl_prefix')
    else:
        cfg.set('dbi-crawler', 'tbl_prefix', prefix)

    db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg)
    if not db.table_exists(table=table):
        rval = ("Table '%s' does not exist" % (table))
    else:
        db.drop(table=table)
        if db.table_exists(table=table):
            rval = ("Attempt to drop table '%s' failed" % (table))
        else:
            rval = ("Attempt to drop table '%s' was successful" % (table))

    db.close()
    return rval
Ejemplo n.º 14
0
    def __init__(self, connect=True, *args, **kwargs):
        """
        Initialize the object
        """
        self.prompt = "]:"
        self.verbose = False
        self.unavailable = False
        self.xobj = None
        self.timeout = 60

        cmdopts = " ".join(args)
        for key in kwargs:
            setattr(self, key, kwargs[key])

        cfg = CrawlConfig.get_config()
        if not hasattr(self, 'reset_atime'):
            self.reset_atime = cfg.getboolean('cv', 'reset_atime')

        if not hasattr(self, 'hash_algorithm'):
            self.hash_algorithm = cfg.get_d('cv', 'hash_algorithm', None)

        maybe_update_hsi()
        self.cmd = "hsi " + cmdopts
        if connect:
            self.connect()
Ejemplo n.º 15
0
def crl_start(argv):
    """start - if the crawler is not already running as a daemon, start it

    usage: crawl start

    default config file: crawl.cfg, or
                         $CRAWL_CONF, or
                         -c <filename> on command line
    default log file:    /var/log/crawl.log, or
                         $CRAWL_LOG, or
                         -l <filename> on command line
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--log',
                 action='store', default='', dest='logfile',
                 help='specify the log file')
    p.add_option('-C', '--context',
                 action='store', default='', dest='context',
                 help="context of crawler ('TEST' or 'PROD')")
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)

    #
    # Initialize the configuration
    #
    if o.context != '':
        cfg.set('crawler', 'context', o.context)
    try:
        exitpath = cfg.get('crawler', 'exitpath')
    except CrawlConfig.NoOptionError as e:
        print("No exit path is specified in the configuration")
        sys.exit(1)

    vstr = "HPSS Integrity Crawler version %s" % version.__version__
    log = CrawlConfig.log(vstr, logpath=o.logfile, cfg=cfg)
    pfpath = make_pidfile(os.getpid(),
                          cfg.get('crawler', 'context'),
                          exitpath,
                          just_check=True)
    crawler = CrawlDaemon(pfpath,
                          stdout="crawler.stdout",
                          stderr="crawler.stderr",
                          logger=log,
                          workdir='.')
    CrawlConfig.log('crl_start: calling crawler.start()')
    crawler.start()
    pass
Ejemplo n.º 16
0
def cvv_report(argv):
    """report - show the checksum verifier database status

    select count(*) from checkables where type = 'f';
    select count(*) from checkables where checksum <> 0;
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-p', '--prefix',
                 action='store', default='', dest='prefix',
                 help='table name prefix')
    p.add_option('-v', '--verbose',
                 action='store_true', default=False, dest='verbose',
                 help='pass verbose flag to HSI object')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config != '':
        cfg = CrawlConfig.get_config(o.config)
    else:
        cfg = CrawlConfig.get_config()

    if o.prefix != '':
        cfg.set('dbi', 'tbl_prefix', o.prefix)

    dim = {}
    dim['cos'] = Dimension.get_dim('cos')
    dim['ttypes'] = Dimension.get_dim('ttypes')

    print dim['cos'].report()
    print dim['ttypes'].report()
Ejemplo n.º 17
0
    def fail_report(self, msg):
        """
        Report a failure
        """
        try:
            f = self.fail_report_fh
        except AttributeError:
            cfg = CrawlConfig.get_config()
            filename = cfg.get('checksum-verifier', 'fail_report')
            self.fail_report_fh = open(filename, 'a')
            f = self.fail_report_fh

        f.write("Failure retrieving file %s: '%s'\n" % (self.path, msg))
        self.set('reported', 1)
        f.flush()
Ejemplo n.º 18
0
    def fail_report(self, msg):
        """
        Report a failure
        """
        try:
            f = self.fail_report_fh
        except AttributeError:
            cfg = CrawlConfig.get_config()
            filename = cfg.get('checksum-verifier', 'fail_report')
            self.fail_report_fh = open(filename, 'a')
            f = self.fail_report_fh

        f.write("Failure retrieving file %s: '%s'\n" % (self.path, msg))
        self.set('reported', 1)
        f.flush()
Ejemplo n.º 19
0
def crl_fire(argv):
    """fire - run a plugin

    usage: crawl fire --cfg cfgname --logpath logfname --plugin plugname
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--logpath',
                 action='store',
                 default='',
                 dest='logpath',
                 help='specify where to send the output')
    p.add_option('-p',
                 '--plugin',
                 action='store',
                 default='',
                 dest='plugname',
                 help='which plugin to fire')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)
    CrawlConfig.log(logpath=o.logpath, cfg=cfg)

    if o.plugname == '':
        print("'-p <plugin-name>' is required")
    elif not cfg.has_section(o.plugname):
        print("No plugin named '%s' found in configuration" % o.plugname)
    else:
        plugdir = cfg.get('crawler', 'plugin-dir')
        sys.path.append(plugdir)
        __import__(o.plugname)
        CrawlConfig.log('firing %s', o.plugname)
        sys.modules[o.plugname].main(cfg)
Ejemplo n.º 20
0
def crl_cfgdump(argv):
    """cfgdump - load a config file and dump its contents

    usage: crawl cfgdump -c <filename> [--to stdout|log] [--logpath <path>]
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-t',
                 '--to',
                 action='store',
                 default='',
                 dest='target',
                 help='specify where to send the output')
    p.add_option('-l',
                 '--logpath',
                 action='store',
                 default='',
                 dest='logpath',
                 help='specify where to send the output')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    if o.target == '':
        o.target = 'stdout'

    cfg = CrawlConfig.get_config(o.config)
    dumpstr = cfg.dump()

    if o.target == 'stdout':
        print dumpstr
    elif o.target == 'log':
        log = CrawlConfig.log(logpath=o.logpath, cfg=cfg)
        for line in dumpstr.split("\n"):
            CrawlConfig.log(line)
Ejemplo n.º 21
0
def stop_wait(cfg=None):
    """
    Watch for the crawler's exit file to disappear. If it's still there after
    the timeout period, give up and throw an exception.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    context = cfg.get('crawler', 'context')
    exitpath = cfg.get('crawler', 'exitpath')
    timeout = cfg.get_time('crawler', 'stopwait_timeout', 5.0)
    sleep_time = cfg.get_time('crawler', 'sleep_time', 0.25)
    lapse = 0.0

    while is_running(context) and lapse < timeout:
        time.sleep(sleep_time)
        lapse += sleep_time

    if is_running(context) and timeout <= lapse:
        raise util.HpssicError("Stop wait timeout exceeded")
Ejemplo n.º 22
0
def crl_log(argv):
    """log - write a message to the indicated log file

    usage: crawl log --log <filename> <message>
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--log',
                 action='store', default=None, dest='logfile',
                 help='specify the log file')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log(" ".join(a), logpath=o.logfile, cfg=cfg)
Ejemplo n.º 23
0
def stop_wait(cfg=None):
    """
    Watch for the crawler's exit file to disappear. If it's still there after
    the timeout period, give up and throw an exception.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    context = cfg.get('crawler', 'context')
    exitpath = cfg.get('crawler', 'exitpath')
    timeout = cfg.get_time('crawler', 'stopwait_timeout', 5.0)
    sleep_time = cfg.get_time('crawler', 'sleep_time', 0.25)
    lapse = 0.0

    while is_running(context) and lapse < timeout:
        time.sleep(sleep_time)
        lapse += sleep_time

    if is_running(context) and timeout <= lapse:
        raise util.HpssicError("Stop wait timeout exceeded")
Ejemplo n.º 24
0
def is_running(context=None):
    """
    Return True if the crawler is running (per ps(1)) or False otherwise.
    """
    running = False
    if context is None:
        cfg = CrawlConfig.get_config()
        try:
            context = cfg.get('crawler', 'context')
        except CrawlConfig.NoOptionError as e:
            emsg = ("No option 'context' in section 'crawler', file '%s'" %
                    cfg.filename)
            raise StandardError(emsg)

    rpi_l = running_pid(context=context)
    for rpi in rpi_l:
        if rpi[1] == context:
            running = True

    return running
Ejemplo n.º 25
0
def is_running(context=None):
    """
    Return True if the crawler is running (per ps(1)) or False otherwise.
    """
    running = False
    if context is None:
        cfg = CrawlConfig.get_config()
        try:
            context = cfg.get('crawler', 'context')
        except CrawlConfig.NoOptionError as e:
            emsg = ("No option 'context' in section 'crawler', file '%s'" %
                    cfg.filename)
            raise StandardError(emsg)

    rpi_l = running_pid(context=context)
    for rpi in rpi_l:
        if rpi[1] == context:
            running = True

    return running
Ejemplo n.º 26
0
def xplocks(output=None, mark=False):
    """
    Look for expired purge locks in bfpurgerec.
    """
    cfg = CrawlConfig.get_config()
    now = time.time()
    hits = 0

    opened = True
    if output is None:
        f = open(cfg.get('mpra', 'report_file'), 'a')
    elif type(output) == str:
        f = open(output, 'a')
    elif type(output) == file:
        f = output
        opened = False
    else:
        raise StandardError("output type must be 'str' or 'file' ")

    dbs = CrawlDBI.DBI(dbtype='hpss', dbname='sub')

    lock_min = cfg.getint('mpra', 'lock_duration')

    rows = dbs.select(table='bfpurgerec',
                      fields=['bfid', 'record_lock_time'],
                      where='record_lock_time <> 0')
    if 0 < len(rows):
        f.write("Expired Purge Locks\n")
        for r in rows:
            if (lock_min * 60) < (now - r['RECORD_LOCK_TIME']):
                hits += 1
                f.write("   %s  %s\n" % (CrawlDBI.DBIdb2.hexstr(r['BFID']),
                                         util.ymdhms(r['RECORD_LOCK_TIME'])))

    if mark:
        mpra_record_recent('purge', 0, 0, hits)

    if opened:
        f.close()

    return hits
Ejemplo n.º 27
0
def mprf_reset(args):
    """reset - drop the mpra table and remove mpra_report.txt

    usage: mpra reset

    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-f',
                 '--force',
                 action='store_true',
                 default=False,
                 dest='force',
                 help='force the operation')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    if not o.force:
        answer = raw_input(MSG.all_mpra_data_lost)
        if answer[0].lower() != "y":
            raise SystemExit()

    cfg = CrawlConfig.get_config(o.config)

    dbschem.drop_table(cfg=cfg, table='mpra')

    filename = cfg.get('mpra', 'report_file')
    util.conditional_rm(filename)
Ejemplo n.º 28
0
def drop_tables_matching(tablike):
    """
    Drop tables with names matching the *tablike* expression. At the time of
    writing, this is only used for drop test tables ('test_%')
    """
    tcfg = CrawlConfig.get_config()
    tcfg.set('dbi-crawler', 'tbl_prefix', '')
    db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler')
    if CrawlDBI.mysql_available and 'mysql' in str(db):

        # db = CrawlDBI.DBI(cfg=tcfg, dbtype='crawler')
        with warnings.catch_warnings():
            warnings.filterwarnings("ignore",
                                    "Can't read dir of .*")
            tlist = db.select(table="information_schema.tables",
                              fields=['table_name'],
                              where="table_name like '%s'" % tablike)
            for (tname,) in tlist:
                if db.table_exists(table=tname):
                    db.drop(table=tname)
    db.close()
Ejemplo n.º 29
0
def simplug(plugin, args):
    """
    Common plugin simulator. May be used by the interactive tools to simulate
    running the associated plugin.
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-i',
                 '--iterations',
                 action='store',
                 default=1,
                 dest='iterations',
                 type='int',
                 help='how many iterations to run')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log("starting %s simplug, just got config" % plugin)
    sys.path.append(cfg.get('crawler', 'plugin-dir'))
    modname = cfg.get(plugin, 'module')
    try:
        P = __import__(modname)
    except ImportError:
        H = __import__('hpssic.plugins.' + modname)
        P = getattr(H.plugins, modname)
    P.main(cfg)
    if 1 < o.iterations:
        for count in range(o.iterations - 1):
            stime = cfg.get_time(plugin, 'frequency')
            time.sleep(stime)
            P.main(cfg)
Ejemplo n.º 30
0
    def load_priority_list(cls):
        """
        If one or more priority list files are configured, read them and put
        their contents first in the list of Checkables to be processed
        """
        rval = []
        cfg = CrawlConfig.get_config()
        priglob = cfg.get_d('cv', 'priority', '')
        if priglob == '':
            return rval

        pricomp = cfg.get_d('cv', 'completed',
                            U.pathjoin(U.dirname(priglob), 'completed'))

        for pripath in U.foldsort(glob.glob(priglob)):
            with open(pripath, 'r') as f:
                for line in f.readlines():
                    path = line.strip()
                    rval.append(Checkable(path=path, type='f'))
            os.rename(pripath, U.pathjoin(pricomp, U.basename(pripath)))

        return rval
Ejemplo n.º 31
0
def tccp_zreport(args):
    """zreport - show what tcc_report will do with a bitfile id

    usage: tcc zreport NSOBJECT-ID

    Note: This will only report bitfiles where the COS count and file count
    differ. Giving it any old object id won't necessarily generate any output.
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    try:
        nsobj_id = a[0]
    except:
        print("usage: tcc zreport OBJECT_ID")
        return

    cfg = CrawlConfig.get_config()
    outfile = cfg.get(tcc_lib.sectname(), 'report_file')

    cosinfo = tcc_lib.get_cos_info()
    try:
        bfl = tcc_lib.get_bitfile_set(int(nsobj_id), 1)
    except U.HpssicError as e:
        bfl = []
        pass
    print("Writing output to %s" % outfile)
    for bf in bfl:
        tcc_lib.tcc_report(bf, cosinfo)
Ejemplo n.º 32
0
    def load_priority_list(cls):
        """
        If one or more priority list files are configured, read them and put
        their contents first in the list of Checkables to be processed
        """
        rval = []
        cfg = CrawlConfig.get_config()
        priglob = cfg.get_d('cv', 'priority', '')
        if priglob == '':
            return rval

        pricomp = cfg.get_d('cv',
                            'completed',
                            U.pathjoin(U.dirname(priglob), 'completed'))

        for pripath in U.foldsort(glob.glob(priglob)):
            with open(pripath, 'r') as f:
                for line in f.readlines():
                    path = line.strip()
                    rval.append(Checkable(path=path, type='f'))
            os.rename(pripath, U.pathjoin(pricomp, U.basename(pripath)))

        return rval
Ejemplo n.º 33
0
def crl_fire(argv):
    """fire - run a plugin

    usage: crawl fire --cfg cfgname --logpath logfname --plugin plugname
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--logpath',
                 action='store', default='', dest='logpath',
                 help='specify where to send the output')
    p.add_option('-p', '--plugin',
                 action='store', default='', dest='plugname',
                 help='which plugin to fire')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)
    CrawlConfig.log(logpath=o.logpath, cfg=cfg)

    if o.plugname == '':
        print("'-p <plugin-name>' is required")
    elif not cfg.has_section(o.plugname):
        print("No plugin named '%s' found in configuration" % o.plugname)
    else:
        plugdir = cfg.get('crawler', 'plugin-dir')
        sys.path.append(plugdir)
        __import__(o.plugname)
        CrawlConfig.log('firing %s', o.plugname)
        sys.modules[o.plugname].main(cfg)
Ejemplo n.º 34
0
def mprf_age(args):
    """age - list the records in table BFMIGRREC or BFPURGEREC older than age

    usage: mpra age -t [migr|purge] -a/--age N[S|M|H|d|m|Y] [-c/--count]

    Report migration records (or a count of them) older than the age indicated.

    --age N        -- report records older than N
    --before D     -- report records from before date D
    --start S      -- report records with timestamps larger than S
    --end E        -- report recs with timestampes smaller than E
    """
    p = optparse.OptionParser()
    p.add_option('-a',
                 '--age',
                 action='store',
                 default='',
                 dest='age',
                 help='report records older than this')
    p.add_option('-b',
                 '--before',
                 action='store',
                 default='',
                 dest='before',
                 help='report records from before this epoch')
    p.add_option('-c',
                 '--count',
                 action='store_true',
                 default=False,
                 dest='count',
                 help='report record counts rather than records')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-e',
                 '--end',
                 action='store',
                 default='',
                 dest='end',
                 help='ending epoch time')
    p.add_option('-p',
                 '--path',
                 action='store_true',
                 default=False,
                 dest='path',
                 help='report paths as well as bitfile IDs')
    p.add_option('-s',
                 '--start',
                 action='store',
                 default='',
                 dest='start',
                 help='starting epoch time')
    p.add_option('-t',
                 '--table',
                 action='store',
                 default='',
                 dest='table',
                 help='which table to age')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    start = 0
    if o.age and o.before:
        raise StandardError("--age and --before are mutually exclusive")
    elif o.age and '' != o.end:
        raise StandardError("--age and --end are mutually exclusive")
    elif o.before and '' != o.end:
        raise StandardError("--before and --end are mutually exclusive")
    elif o.before:
        end = time.mktime(time.strptime(o.before, "%Y.%m%d"))
    elif o.age:
        end = time.time() - cfg.to_seconds(o.age)
    elif o.end:
        end = util.epoch(o.end)

    if o.start:
        start = util.epoch(o.start)
    if o.table == '':
        o.table = 'migr'

    print("%d, %d" % (start, end))
    mpra_lib.age(o.table, start, end, o.count, sys.stdout, path=o.path)
Ejemplo n.º 35
0
    def run(self):
        """
        This routine runs in the background as a daemon. Here's where
        we fire off plug-ins as appropriate.
        """
        cfgname = ''
        self.cfg = CrawlConfig.get_config(cfgname)
        self.pidfile = "%s/%d" % (self.piddir, os.getpid())
        exit_file = self.cfg.get('crawler', 'exitpath')
        ctx = self.cfg.get('crawler', 'context')
        clean_defunct_pidfiles(ctx)
        make_pidfile(os.getpid(), ctx, exit_file)
        atexit.register(self.delpid)

        keep_going = True
        plugin_d = {}
        while keep_going:
            try:
                pluglstr = self.cfg.get('crawler', 'plugins')
                pluglist = [x.strip() for x in pluglstr.split(',')]
                for s in pluglist:
                    self.dlog('crawl: CONFIG: [%s]' % s)
                    for o in self.cfg.options(s):
                        self.dlog('crawl: CONFIG: %s: %s' %
                                  (o, self.cfg.get(s, o)))
                    if s == 'crawler':
                        continue
                    elif s in plugin_d.keys():
                        CrawlConfig.log("reloading plugin %s" % s)
                        plugin_d[s].reload(self.cfg)
                    else:
                        CrawlConfig.log("initial load of plugin %s" % s)
                        plugin_d[s] = CrawlPlugin.CrawlPlugin(name=s,
                                                              cfg=self.cfg)

                # remove any plugins that are not in the new configuration
                for p in plugin_d.keys():
                    if p not in self.cfg.sections():
                        CrawlConfig.log("unloading obsolete plugin %s" % p)
                        del plugin_d[p]

                heartbeat = self.cfg.get_time('crawler', 'heartbeat', 10)
                while keep_going:
                    #
                    # Fire any plugins that are due
                    #
                    if not self.cfg.quiet_time(time.time()):
                        hb_msg = "crawl: heartbeat..."
                        if self.fire_plugins(plugin_d):
                            keep_going = False
                    else:
                        hb_msg = "crawl: heartbeat... [quiet]"

                    # CrawlConfig.log("issue the heartbeat")
                    #
                    # Issue the heartbeat if it's time
                    #
                    if 0 == (int(time.time()) % heartbeat):
                        # self.dlog(hb_msg)
                        CrawlConfig.log(hb_msg)

                    # CrawlConfig.log("check for config changes")
                    #
                    # If config file has changed, reload it.
                    # cached config object and breaking out of the inner loop.
                    #
                    if self.cfg.changed():
                        cfgname = self.cfg.get('crawler', 'filename')
                        self.cfg = CrawlConfig.get_config(reset=True)
                        break

                    # CrawlConfig.log("check for exit signal")
                    #
                    # Check for the exit signal
                    #
                    if util.conditional_rm(exit_file):
                        self.dlog('crawl: shutting down')
                        keep_going = False

                    # CrawlConfig.log("sleep")
                    #
                    # We cycle once per second so we can detect if the user
                    # asks us to stop or if the config file changes and needs
                    # to be reloaded
                    #
                    time.sleep(1.0)

            except:
                # if we get an exception, write the traceback to the log file
                tbstr = tb.format_exc()
                for line in tbstr.split('\n'):
                    self.dlog("crawl: '%s'" % line)
                keep_going = False
Ejemplo n.º 36
0
    def run(self):
        """
        This routine runs in the background as a daemon. Here's where
        we fire off plug-ins as appropriate.
        """
        cfgname = ''
        self.cfg = CrawlConfig.get_config(cfgname)
        self.pidfile = "%s/%d" % (self.piddir, os.getpid())
        exit_file = self.cfg.get('crawler', 'exitpath')
        ctx = self.cfg.get('crawler', 'context')
        clean_defunct_pidfiles(ctx)
        make_pidfile(os.getpid(), ctx, exit_file)
        atexit.register(self.delpid)

        keep_going = True
        plugin_d = {}
        while keep_going:
            try:
                pluglstr = self.cfg.get('crawler', 'plugins')
                pluglist = [x.strip() for x in pluglstr.split(',')]
                for s in pluglist:
                    self.dlog('crawl: CONFIG: [%s]' % s)
                    for o in self.cfg.options(s):
                        self.dlog('crawl: CONFIG: %s: %s' %
                                  (o, self.cfg.get(s, o)))
                    if s == 'crawler':
                        continue
                    elif s in plugin_d.keys():
                        CrawlConfig.log("reloading plugin %s" % s)
                        plugin_d[s].reload(self.cfg)
                    else:
                        CrawlConfig.log("initial load of plugin %s" % s)
                        plugin_d[s] = CrawlPlugin.CrawlPlugin(name=s,
                                                              cfg=self.cfg)

                # remove any plugins that are not in the new configuration
                for p in plugin_d.keys():
                    if p not in self.cfg.sections():
                        CrawlConfig.log("unloading obsolete plugin %s" % p)
                        del plugin_d[p]

                heartbeat = self.cfg.get_time('crawler', 'heartbeat', 10)
                while keep_going:
                    #
                    # Fire any plugins that are due
                    #
                    if not self.cfg.quiet_time(time.time()):
                        hb_msg = "crawl: heartbeat..."
                        if self.fire_plugins(plugin_d):
                            keep_going = False
                    else:
                        hb_msg = "crawl: heartbeat... [quiet]"

                    # CrawlConfig.log("issue the heartbeat")
                    #
                    # Issue the heartbeat if it's time
                    #
                    if 0 == (int(time.time()) % heartbeat):
                        # self.dlog(hb_msg)
                        CrawlConfig.log(hb_msg)

                    # CrawlConfig.log("check for config changes")
                    #
                    # If config file has changed, reload it.
                    # cached config object and breaking out of the inner loop.
                    #
                    if self.cfg.changed():
                        cfgname = self.cfg.get('crawler', 'filename')
                        self.cfg = CrawlConfig.get_config(reset=True)
                        break

                    # CrawlConfig.log("check for exit signal")
                    #
                    # Check for the exit signal
                    #
                    if util.conditional_rm(exit_file):
                        self.dlog('crawl: shutting down')
                        keep_going = False

                    # CrawlConfig.log("sleep")
                    #
                    # We cycle once per second so we can detect if the user
                    # asks us to stop or if the config file changes and needs
                    # to be reloaded
                    #
                    time.sleep(1.0)

            except:
                # if we get an exception, write the traceback to the log file
                tbstr = tb.format_exc()
                for line in tbstr.split('\n'):
                    self.dlog("crawl: '%s'" % line)
                keep_going = False
Ejemplo n.º 37
0
def crl_start(argv):
    """start - if the crawler is not already running as a daemon, start it

    usage: crawl start

    default config file: crawl.cfg, or
                         $CRAWL_CONF, or
                         -c <filename> on command line
    default log file:    /var/log/crawl.log, or
                         $CRAWL_LOG, or
                         -l <filename> on command line
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--log',
                 action='store',
                 default='',
                 dest='logfile',
                 help='specify the log file')
    p.add_option('-C',
                 '--context',
                 action='store',
                 default='',
                 dest='context',
                 help="context of crawler ('TEST' or 'PROD')")
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)

    #
    # Initialize the configuration
    #
    if o.context != '':
        cfg.set('crawler', 'context', o.context)
    try:
        exitpath = cfg.get('crawler', 'exitpath')
    except CrawlConfig.NoOptionError as e:
        print("No exit path is specified in the configuration")
        sys.exit(1)

    vstr = "HPSS Integrity Crawler version %s" % version.__version__
    log = CrawlConfig.log(vstr, logpath=o.logfile, cfg=cfg)
    pfpath = make_pidfile(os.getpid(),
                          cfg.get('crawler', 'context'),
                          exitpath,
                          just_check=True)
    crawler = CrawlDaemon(pfpath,
                          stdout="crawler.stdout",
                          stderr="crawler.stderr",
                          logger=log,
                          workdir='.')
    CrawlConfig.log('crl_start: calling crawler.start()')
    crawler.start()
    pass
Ejemplo n.º 38
0
def mprf_migr_recs(args):
    """migr_recs - list the records in table BFMIGRREC

    usage: mpra migr_recs [-l/limit N]
                          [-b/--before DATE-TIME]
                          [-a/--after DATE-TIME]

    with -l N, only report the first N records

    with -b DATE-TIME, only report the records with create times before
    DATE-TIME.

    with -a DATE-TIME, only report the records with create times after
    DATE-TIME.
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--count',
                 action='store_true',
                 default=False,
                 dest='count',
                 help='report record counts rather than records')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--limit',
                 action='store',
                 default='',
                 dest='limit',
                 help='how many records to fetch')
    p.add_option('-b',
                 '--before',
                 action='store',
                 default='',
                 dest='before',
                 help='fetch records from before the date/time')
    p.add_option('-a',
                 '--after',
                 action='store',
                 default='',
                 dest='after',
                 help='fetch records from after the date/time')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()

    dbargs = {'table': 'bfmigrrec'}

    if o.limit == '' and o.before == '' and o.after == '':
        dbargs['limit'] = 30

    elif o.limit == '' and o.before == '' and o.after != '':
        dbargs['where'] = '? < record_create_time'
        dbargs['data'] = (util.epoch(o.after), )

    elif o.limit == '' and o.before != '' and o.after == '':
        dbargs['where'] = 'record_create_time < ?'
        dbargs['data'] = (util.epoch(o.before), )

    elif o.limit == '' and o.before != '' and o.after != '':
        dbargs['where'] = '? < record_create_time and record_create_time < ?'
        dbargs['data'] = (util.epoch(o.after), util.epoch(o.before))

    elif o.limit != '' and o.before == '' and o.after == '':
        dbargs['limit'] = int(o.limit)

    elif o.limit != '' and o.before == '' and o.after != '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = '? < record_create_time'
        dbargs['data'] = (util.epoch(o.after), )

    elif o.limit != '' and o.before != '' and o.after == '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = 'record_create_time < ?'
        dbargs['data'] = (util.epoch(o.before), )

    elif o.limit != '' and o.before != '' and o.after != '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = '? < record_create_time and record_create_time < ?'
        dbarsg['data'] = (util.epoch(o.after), util.epoch(o.before))

    if o.count:
        dbargs['fields'] = ['count(*)']
    else:
        dbargs['fields'] = [
            'bfid', 'record_create_time', 'migration_failure_count'
        ]

    dbargs['orderby'] = 'record_create_time'

    rows = mpra_lib.lookup_migr_recs(**dbargs)
    for row in rows:
        if o.count:
            print("Records found: %d" % row['1'])
        else:
            print("%s %s %d" % (CrawlDBI.DBIdb2.hexstr(
                row['BFID']), util.ymdhms(row['RECORD_CREATE_TIME']),
                                row['MIGRATION_FAILURE_COUNT']))
Ejemplo n.º 39
0
def mprf_migr_recs(args):
    """migr_recs - list the records in table BFMIGRREC

    usage: mpra migr_recs [-l/limit N]
                          [-b/--before DATE-TIME]
                          [-a/--after DATE-TIME]

    with -l N, only report the first N records

    with -b DATE-TIME, only report the records with create times before
    DATE-TIME.

    with -a DATE-TIME, only report the records with create times after
    DATE-TIME.
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--count',
                 action='store_true', default=False, dest='count',
                 help='report record counts rather than records')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--limit',
                 action='store', default='', dest='limit',
                 help='how many records to fetch')
    p.add_option('-b', '--before',
                 action='store', default='', dest='before',
                 help='fetch records from before the date/time')
    p.add_option('-a', '--after',
                 action='store', default='', dest='after',
                 help='fetch records from after the date/time')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()

    dbargs = {'table': 'bfmigrrec'}

    if o.limit == '' and o.before == '' and o.after == '':
        dbargs['limit'] = 30

    elif o.limit == '' and o.before == '' and o.after != '':
        dbargs['where'] = '? < record_create_time'
        dbargs['data'] = (util.epoch(o.after),)

    elif o.limit == '' and o.before != '' and o.after == '':
        dbargs['where'] = 'record_create_time < ?'
        dbargs['data'] = (util.epoch(o.before),)

    elif o.limit == '' and o.before != '' and o.after != '':
        dbargs['where'] = '? < record_create_time and record_create_time < ?'
        dbargs['data'] = (util.epoch(o.after), util.epoch(o.before))

    elif o.limit != '' and o.before == '' and o.after == '':
        dbargs['limit'] = int(o.limit)

    elif o.limit != '' and o.before == '' and o.after != '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = '? < record_create_time'
        dbargs['data'] = (util.epoch(o.after),)

    elif o.limit != '' and o.before != '' and o.after == '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = 'record_create_time < ?'
        dbargs['data'] = (util.epoch(o.before),)

    elif o.limit != '' and o.before != '' and o.after != '':
        dbargs['limit'] = int(o.limit)
        dbargs['where'] = '? < record_create_time and record_create_time < ?'
        dbarsg['data'] = (util.epoch(o.after), util.epoch(o.before))

    if o.count:
        dbargs['fields'] = ['count(*)']
    else:
        dbargs['fields'] = ['bfid',
                            'record_create_time',
                            'migration_failure_count']

    dbargs['orderby'] = 'record_create_time'

    rows = mpra_lib.lookup_migr_recs(**dbargs)
    for row in rows:
        if o.count:
            print("Records found: %d" % row['1'])
        else:
            print("%s %s %d" % (CrawlDBI.DBIdb2.hexstr(row['BFID']),
                                util.ymdhms(row['RECORD_CREATE_TIME']),
                                row['MIGRATION_FAILURE_COUNT']))
Ejemplo n.º 40
0
def mprf_age(args):
    """age - list the records in table BFMIGRREC or BFPURGEREC older than age

    usage: mpra age -t [migr|purge] -a/--age N[S|M|H|d|m|Y] [-c/--count]

    Report migration records (or a count of them) older than the age indicated.

    --age N        -- report records older than N
    --before D     -- report records from before date D
    --start S      -- report records with timestamps larger than S
    --end E        -- report recs with timestampes smaller than E
    """
    p = optparse.OptionParser()
    p.add_option('-a', '--age',
                 action='store', default='', dest='age',
                 help='report records older than this')
    p.add_option('-b', '--before',
                 action='store', default='', dest='before',
                 help='report records from before this epoch')
    p.add_option('-c', '--count',
                 action='store_true', default=False, dest='count',
                 help='report record counts rather than records')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-e', '--end',
                 action='store', default='', dest='end',
                 help='ending epoch time')
    p.add_option('-p', '--path',
                 action='store_true', default=False, dest='path',
                 help='report paths as well as bitfile IDs')
    p.add_option('-s', '--start',
                 action='store', default='', dest='start',
                 help='starting epoch time')
    p.add_option('-t', '--table',
                 action='store', default='', dest='table',
                 help='which table to age')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    start = 0
    if o.age and o.before:
        raise StandardError("--age and --before are mutually exclusive")
    elif o.age and '' != o.end:
        raise StandardError("--age and --end are mutually exclusive")
    elif o.before and '' != o.end:
        raise StandardError("--before and --end are mutually exclusive")
    elif o.before:
        end = time.mktime(time.strptime(o.before, "%Y.%m%d"))
    elif o.age:
        end = time.time() - cfg.to_seconds(o.age)
    elif o.end:
        end = util.epoch(o.end)

    if o.start:
        start = util.epoch(o.start)
    if o.table == '':
        o.table = 'migr'

    print("%d, %d" % (start, end))
    mpra_lib.age(o.table, start, end, o.count, sys.stdout, path=o.path)
Ejemplo n.º 41
0
    def check(self):
        """
        For a directory:
         - get a list of its contents if possible,
         - create a Checkable object for each item and persist it to the
           database
         - return the list of Checkables found in the directory
        For a file:
         - if it already has a hash, add it to the sample if not already
           and verify it
         - if it does not have a hash, decide whether to add it or not

        The value of probability [0.0 .. 1.0] indicates the likelihood with
        which we should check files.

        potential outcomes            return
         read a directory             list of Checkable objects
         file checksum fail           Alert
         invalid Checkable type       raise StandardError
         access denied                "access denied"
         verified file checksum       "matched"
         checksum a file              "checksummed"
         skipped a file               "skipped"
         hpss unavailable             "unavailable"

        Here we examine a population member, count it as a member of the
        population, decide whether to add it to the sample, and if so, count it
        as a sample member.

        First, we have to make all the decisions and update the object
        accordingly.

        Then, we persist the object to the database.
        """
        # fire up hsi
        # self.probability = probability
        rval = []
        cfg = CrawlConfig.get_config()
        # hsi_timeout = int(cfg.get_d('crawler', 'hsi_timeout', 300))
        try:
            # h = hpss.HSI(timeout=hsi_timeout, verbose=True)
            h = hpss.HSI(verbose=True)
            CrawlConfig.log("started hsi with pid %d" % h.pid())
        except hpss.HSIerror as e:
            return "unavailable"

        if self.type == 'd':
            rsp = h.lsP(self.path)
            if "Access denied" in rsp:
                rval = "access denied"
            else:
                for line in rsp.split("\n"):
                    new = Checkable.fdparse(line)
                    if new is not None:
                        rval.append(new)
                        new.load()
                        new.persist()
                        # returning list of items found in the directory
        elif self.type == 'f':
            if self.cart is None:
                self.populate_cart(h)
            if self.checksum == 0:
                if self.has_hash(h):
                    self.add_to_sample(h, already_hashed=True)
                    rval = self.verify(h)
                    # returning "matched", "checksummed", "skipped", or Alert()
                elif self.addable():
                    rval = self.add_to_sample(h)
                    # returning "access denied" or "checksummed"
                else:
                    rval = "skipped"
            else:
                rval = self.verify(h)
                # returning "matched", "checksummed", "skipped", or Alert()
        else:
            raise StandardError("Invalid Checkable type: %s" % self.type)

        if (3 < self.fails) and (0 == self.reported):
            self.fail_report(h.before())
            rval = "skipped"

        h.quit()

        self.set('last_check', time.time())
        CrawlConfig.log(
            "Persisting checkable '%s' with %s = %f, %s = %d" %
            (self.path, 'last_check', self.last_check, 'fails', self.fails))
        self.persist()
        return rval
Ejemplo n.º 42
0
def age(table,
        start=None,
        end=None,
        count=False,
        output=None,
        path=False,
        mark=False):
    """
    Retrieve and return (count of) records older than end and younger than
    start. The result is written to output. If path is True, age_report will
    compute the bitfile pathname and report it as well. If mark is True, we
    update the mpra table with the date/time of the newest record reported.

    Strict less than compares is the right thing to do. We record the last time
    reported in the mpra recent table. We've reported all the records with that
    time. We're looking into the past, so any new records added cannot have
    that time -- they're being added in the present when timestamps have larger
    values. So we want to start with the next one after the last one reported.
    """
    cfg = CrawlConfig.get_config()
    opened = True
    if output is None:
        f = open(cfg.get('mpra', 'report_file'), 'a')
    elif type(output) == str:
        f = open(output, 'a')
    elif type(output) == file:
        f = output
        opened = False
    else:
        raise StandardError("output type must be 'str' or 'file' ")

    db = CrawlDBI.DBI(dbtype='hpss', dbname='sub')

    # Here we set selection constraints for the select to retrieve the records
    # of interest, and we also set the time delta into the past, stored in age.
    # Arguments *start* and *end* provide boundaries delimiting a time segment.
    # We store in *age* the distance from the current time back to *end*. If
    # *end* is not set, it is presumed to be the same as the present, so age is
    # 0. *age* is passed to age_report in the count branch below.
    if start is not None and end is not None:
        dbargs = {'where': '? < record_create_time and record_create_time < ?',
                  'data': (start, end)}
        age = int(time.time()) - end
    elif start is None and end is not None:
        dbargs = {'where': 'record_create_time < ?',
                  'data': (end, )}
        age = int(time.time()) - end
    elif start is not None and end is None:
        dbargs = {'where': '? < record_create_time',
                  'data': (start, )}
        age = 0
    else:
        age = 0

    if count:
        dbargs['fields'] = ['count(*)']
    else:
        dbargs['fields'] = ['bfid',
                            'record_create_time',
                            'migration_failure_count']
        dbargs['orderby'] = 'record_create_time'

    try:
        dbargs['table'] = {'migr': 'bfmigrrec',
                           'purge': 'bfpurgerec'}[table]
    except KeyError:
        dbargs['table'] = 'bfmigrrec'

    rows = db.select(**dbargs)
    recent = 0
    rval = len(rows)
    if count:
        age_report(table, age, count, rows, f, path)
    elif 0 < len(rows):
        for row in rows:
            if recent < row['RECORD_CREATE_TIME']:
                recent = row['RECORD_CREATE_TIME']

        age_report(table, int(time.time()) - recent, count, rows, f, path)

    if mark:
        mpra_record_recent(table,
                           start,
                           recent if 0 < recent else end,
                           len(rows))

    if opened:
        f.close()

    return rval
Ejemplo n.º 43
0
    def check(self):
        """
        For a directory:
         - get a list of its contents if possible,
         - create a Checkable object for each item and persist it to the
           database
         - return the list of Checkables found in the directory
        For a file:
         - if it already has a hash, add it to the sample if not already
           and verify it
         - if it does not have a hash, decide whether to add it or not

        The value of probability [0.0 .. 1.0] indicates the likelihood with
        which we should check files.

        potential outcomes            return
         read a directory             list of Checkable objects
         file checksum fail           Alert
         invalid Checkable type       raise StandardError
         access denied                "access denied"
         verified file checksum       "matched"
         checksum a file              "checksummed"
         skipped a file               "skipped"
         hpss unavailable             "unavailable"

        Here we examine a population member, count it as a member of the
        population, decide whether to add it to the sample, and if so, count it
        as a sample member.

        First, we have to make all the decisions and update the object
        accordingly.

        Then, we persist the object to the database.
        """
        # fire up hsi
        # self.probability = probability
        rval = []
        cfg = CrawlConfig.get_config()
        # hsi_timeout = int(cfg.get_d('crawler', 'hsi_timeout', 300))
        try:
            # h = hpss.HSI(timeout=hsi_timeout, verbose=True)
            h = hpss.HSI(verbose=True)
            CrawlConfig.log("started hsi with pid %d" % h.pid())
        except hpss.HSIerror as e:
            return "unavailable"

        if self.type == 'd':
            rsp = h.lsP(self.path)
            if "Access denied" in rsp:
                rval = "access denied"
            else:
                for line in rsp.split("\n"):
                    new = Checkable.fdparse(line)
                    if new is not None:
                        rval.append(new)
                        new.load()
                        new.persist()
                        # returning list of items found in the directory
        elif self.type == 'f':
            if self.cart is None:
                self.populate_cart(h)
            if self.checksum == 0:
                if self.has_hash(h):
                    self.add_to_sample(h, already_hashed=True)
                    rval = self.verify(h)
                    # returning "matched", "checksummed", "skipped", or Alert()
                elif self.addable():
                    rval = self.add_to_sample(h)
                    # returning "access denied" or "checksummed"
                else:
                    rval = "skipped"
            else:
                rval = self.verify(h)
                # returning "matched", "checksummed", "skipped", or Alert()
        else:
            raise StandardError("Invalid Checkable type: %s" % self.type)

        if (3 < self.fails) and (0 == self.reported):
            self.fail_report(h.before())
            rval = "skipped"

        h.quit()

        self.set('last_check', time.time())
        CrawlConfig.log("Persisting checkable '%s' with %s = %f, %s = %d" %
                        (self.path,
                         'last_check', self.last_check,
                         'fails', self.fails))
        self.persist()
        return rval