Ejemplo n.º 1
0
def get_html_report(cfg_file=None, cfg=None):
    """
    Format a report in HTML
    """
    rval = ""
    if cfg is not None:
        # use it
        pass
    elif cfg_file is not None:
        cfg = CrawlConfig.add_config(filename=cfg_file)
    else:
        cfg = CrawlConfig.add_config()

    db = CrawlDBI.DBI(dbtype="crawler")

    last_rpt_time = rpt_lib.get_last_rpt_time(db)
    rval += ('<head><meta http-equiv="refresh" content="60">\n')
    rval += ("<title>HPSSIC Dashboard</title></head>")
    rval += ("<body><center><h1>HPSS Integrity Crawler Dashboard</h1>" +
             "<br><h4>Version %s</h4>" % version.__version__ +
             "</center>\n")
    rval += ("Report generated at %s\n" % time.strftime("%Y.%m%d %H:%M:%S"))
    rval += ("<br>Based on data from %s\n" %
             time.strftime("%Y.%m%d %H:%M:%S", time.localtime(last_rpt_time)))
    rval += get_html_cv_report(db, last_rpt_time)
    rval += get_html_mpra_report(db, last_rpt_time)
    rval += get_html_tcc_report(db, last_rpt_time)
    rval += "</body>"
    db.close()

    return rval
Ejemplo n.º 2
0
def cvv_ttype_missing(argv):
    """ttype_missing - Report records missing ttype information

    usage: cv ttype_missing [-d]
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-c', '--config',
                 action='store', default='', dest='config',
                 help='configuration to use')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    CrawlConfig.get_config(o.config)
    rec_l = cv_lib.ttype_missing()
    for rec in rec_l:
        print("%-40s %-10s %s %s" % (rec[1],
                                     rec[4],
                                     rec[5],
                                     U.ymdhms(int(rec[7]))))
Ejemplo n.º 3
0
def simplug(plugin, args):
    """
    Common plugin simulator. May be used by the interactive tools to simulate
    running the associated plugin.
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-i', '--iterations',
                 action='store', default=1, dest='iterations', type='int',
                 help='how many iterations to run')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log("starting %s simplug, just got config" % plugin)
    sys.path.append(cfg.get('crawler', 'plugin-dir'))
    modname = cfg.get(plugin, 'module')
    try:
        P = __import__(modname)
    except ImportError:
        H = __import__('hpssic.plugins.' + modname)
        P = getattr(H.plugins, modname)
    P.main(cfg)
    if 1 < o.iterations:
        for count in range(o.iterations-1):
            stime = cfg.get_time(plugin, 'frequency')
            time.sleep(stime)
            P.main(cfg)
Ejemplo n.º 4
0
def cvv_ttype_missing(argv):
    """ttype_missing - Report records missing ttype information

    usage: cv ttype_missing [-d]
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-c',
                 '--config',
                 action='store',
                 default='',
                 dest='config',
                 help='configuration to use')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    CrawlConfig.get_config(o.config)
    rec_l = cv_lib.ttype_missing()
    for rec in rec_l:
        print("%-40s %-10s %s %s" %
              (rec[1], rec[4], rec[5], U.ymdhms(int(rec[7]))))
Ejemplo n.º 5
0
    def verify(self, h):
        """
        Attempt to verify the current file.
        """
        CrawlConfig.log("hsi(%d) attempting to verify %s" %
                        (h.pid(), self.path))
        rsp = h.hashverify(self.path)

        if "TIMEOUT" in rsp or "ERROR" in rsp:
            rval = "skipped"
            self.set('fails', self.fails + 1)
            CrawlConfig.log(
                "hashverify transfer incomplete on %s -- skipping" % self.path)
            h.quit()
        elif "%s: (md5) OK" % self.path in rsp:
            rval = "matched"
            CrawlConfig.log("hashverify matched on %s" % self.path)
        elif "no valid checksum found" in rsp:
            if self.addable(self.cos):
                rval = self.add_to_sample(h)
            else:
                self.set('checksum', 0)
                rval = "skipped"
                CrawlConfig.log("hashverify skipped %s" % self.path)
        else:
            rval = Alert.Alert("Checksum mismatch: %s" % rsp)
            CrawlConfig.log("hashverify generated 'Checksum mismatch' " +
                            "alert on %s" % self.path)
        return rval
Ejemplo n.º 6
0
def crl_log(argv):
    """log - write a message to the indicated log file

    usage: crawl log --log <filename> <message>
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--log',
                 action='store',
                 default=None,
                 dest='logfile',
                 help='specify the log file')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log(" ".join(a), logpath=o.logfile, cfg=cfg)
Ejemplo n.º 7
0
def tcc_report(bitfile, cosinfo=None, path=None, log=True, store=True):
    """
    The bitfile appears to not have the right number of copies. We're going to
    write its information out to a report for manual followup.
    """
    cosinfo = get_cos_info()
    fmt = "%7s %8s %8s %s"
    hdr = fmt % ("COS", "Ccopies", "Fcopies", "Filepath")

    # Compute the bitfile's path
    if path is None:
        bfp = get_bitfile_path(bitfile['BFID'])
    else:
        bfp = path
    rpt = fmt % (bitfile['BFATTR_COS_ID'],
                 str(cosinfo[bitfile['BFATTR_COS_ID']]),
                 str(bitfile['SC_COUNT']), bfp)
    if log:
        CrawlConfig.log(rpt)
    if store:
        try:
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
        except AttributeError:
            cfg = CrawlConfig.get_config()
            rptfname = cfg.get(sectname(), 'report_file')
            tcc_report._f = open(rptfname, 'a')
            tcc_report._f.write(hdr)
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
    return rpt
Ejemplo n.º 8
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Ejemplo n.º 9
0
def record_checked_ids(cfg, low, high, correct, error):
    """
    Save checked NSOBJECT ids in the HPSSIC database.

    If we check a range and get no hits (i.e., no NSOBJECT ids exist in the
    range), we'll store

       (<time>, <low-id>, <high-id>, 0, 0)

    If we get a hit with the right copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 1, 0)

    If we get a hit with the wrong copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 0, 1)
    """
    tabname = cfg.get(sectname(), 'table_name')

    result = dbschem.make_table(tabname)
    ts = int(time.time())
    CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts))
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table=tabname,
              fields=[
                  'check_time', 'low_nsobj_id', 'high_nsobj_id', 'correct',
                  'error'
              ],
              data=[(ts, low, high, correct, error)])
    db.close()
Ejemplo n.º 10
0
def check_path(path, verbose=False, plugin=True, xof=True):
    """
    If plugin is True, we want to log and store, which tcc_report does by
    default so we leave those flags alone.

    If plugin is False, we're interactive and we want to write any report to
    stdout. However, we only make a report if 1) verbose is True, or 2) the
    counts don't match.
    """
    cosinfo = get_cos_info()
    nsobj = path_nsobject(path)
    try:
        bfl = get_bitfile_set(int(nsobj), 1)
    except U.HpssicError as e:
        if plugin:
            CrawlConfig.log(e.value)
            return
        elif xof:
            raise SystemExit(e.value)
        else:
            raise U.HpssicError(e.value)

    bf = U.pop0(bfl)
    sc_count = int(bf['SC_COUNT'])
    cos_count = int(cosinfo[bf['BFATTR_COS_ID']])

    if plugin and sc_count != cos_count:
        tcc_report(bf, path=path)
    elif not plugin and (verbose or sc_count != cos_count):
        print(tcc_report(bf, path=path, log=False, store=False))
Ejemplo n.º 11
0
def crl_cfgdump(argv):
    """cfgdump - load a config file and dump its contents

    usage: crawl cfgdump -c <filename> [--to stdout|log] [--logpath <path>]
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-t', '--to',
                 action='store', default='', dest='target',
                 help='specify where to send the output')
    p.add_option('-l', '--logpath',
                 action='store', default='', dest='logpath',
                 help='specify where to send the output')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    if o.target == '':
        o.target = 'stdout'

    cfg = CrawlConfig.get_config(o.config)
    dumpstr = cfg.dump()

    if o.target == 'stdout':
        print dumpstr
    elif o.target == 'log':
        log = CrawlConfig.log(logpath=o.logpath, cfg=cfg)
        for line in dumpstr.split("\n"):
            CrawlConfig.log(line)
Ejemplo n.º 12
0
def record_checked_ids(cfg, low, high, correct, error):
    """
    Save checked NSOBJECT ids in the HPSSIC database.

    If we check a range and get no hits (i.e., no NSOBJECT ids exist in the
    range), we'll store

       (<time>, <low-id>, <high-id>, 0, 0)

    If we get a hit with the right copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 1, 0)

    If we get a hit with the wrong copy count, we store it by itself as

       (<time>, <hit-id>, <hit-id>, 0, 1)
    """
    tabname = cfg.get(sectname(), 'table_name')

    result = dbschem.make_table(tabname)
    ts = int(time.time())
    CrawlConfig.log("recording checked ids %d to %d at %d" % (low, high, ts))
    db = CrawlDBI.DBI(dbtype="crawler")
    db.insert(table=tabname,
              fields=['check_time',
                      'low_nsobj_id',
                      'high_nsobj_id',
                      'correct',
                      'error'],
              data=[(ts, low, high, correct, error)])
    db.close()
Ejemplo n.º 13
0
def check_path(path, verbose=False, plugin=True, xof=True):
    """
    If plugin is True, we want to log and store, which tcc_report does by
    default so we leave those flags alone.

    If plugin is False, we're interactive and we want to write any report to
    stdout. However, we only make a report if 1) verbose is True, or 2) the
    counts don't match.
    """
    cosinfo = get_cos_info()
    nsobj = path_nsobject(path)
    try:
        bfl = get_bitfile_set(int(nsobj), 1)
    except U.HpssicError as e:
        if plugin:
            CrawlConfig.log(e.value)
            return
        elif xof:
            raise SystemExit(e.value)
        else:
            raise U.HpssicError(e.value)

    bf = U.pop0(bfl)
    sc_count = int(bf['SC_COUNT'])
    cos_count = int(cosinfo[bf['BFATTR_COS_ID']])

    if plugin and sc_count != cos_count:
        tcc_report(bf, path=path)
    elif not plugin and (verbose or sc_count != cos_count):
        print(tcc_report(bf, path=path, log=False, store=False))
Ejemplo n.º 14
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Ejemplo n.º 15
0
    def verify(self, h):
        """
        Attempt to verify the current file.
        """
        CrawlConfig.log("hsi(%d) attempting to verify %s" % (h.pid(),
                                                             self.path))
        rsp = h.hashverify(self.path)

        if "TIMEOUT" in rsp or "ERROR" in rsp:
            rval = "skipped"
            self.set('fails', self.fails + 1)
            CrawlConfig.log("hashverify transfer incomplete on %s -- skipping"
                            % self.path)
            h.quit()
        elif "%s: (md5) OK" % self.path in rsp:
            rval = "matched"
            CrawlConfig.log("hashverify matched on %s" % self.path)
        elif "no valid checksum found" in rsp:
            if self.addable(self.cos):
                rval = self.add_to_sample(h)
            else:
                self.set('checksum', 0)
                rval = "skipped"
                CrawlConfig.log("hashverify skipped %s" % self.path)
        else:
            rval = Alert.Alert("Checksum mismatch: %s" % rsp)
            CrawlConfig.log("hashverify generated 'Checksum mismatch' " +
                            "alert on %s" % self.path)
        return rval
Ejemplo n.º 16
0
def tcc_report(bitfile, cosinfo=None, path=None, log=True, store=True):
    """
    The bitfile appears to not have the right number of copies. We're going to
    write its information out to a report for manual followup.
    """
    cosinfo = get_cos_info()
    fmt = "%7s %8s %8s %s"
    hdr = fmt % ("COS", "Ccopies", "Fcopies", "Filepath")

    # Compute the bitfile's path
    if path is None:
        bfp = get_bitfile_path(bitfile['BFID'])
    else:
        bfp = path
    rpt = fmt % (bitfile['BFATTR_COS_ID'],
                 str(cosinfo[bitfile['BFATTR_COS_ID']]),
                 str(bitfile['SC_COUNT']),
                 bfp)
    if log:
        CrawlConfig.log(rpt)
    if store:
        try:
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
        except AttributeError:
            cfg = CrawlConfig.get_config()
            rptfname = cfg.get(sectname(), 'report_file')
            tcc_report._f = open(rptfname, 'a')
            tcc_report._f.write(hdr)
            tcc_report._f.write(rpt + "\n")
            tcc_report._f.flush()
    return rpt
Ejemplo n.º 17
0
def crl_start(argv):
    """start - if the crawler is not already running as a daemon, start it

    usage: crawl start

    default config file: crawl.cfg, or
                         $CRAWL_CONF, or
                         -c <filename> on command line
    default log file:    /var/log/crawl.log, or
                         $CRAWL_LOG, or
                         -l <filename> on command line
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--log',
                 action='store', default='', dest='logfile',
                 help='specify the log file')
    p.add_option('-C', '--context',
                 action='store', default='', dest='context',
                 help="context of crawler ('TEST' or 'PROD')")
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)

    #
    # Initialize the configuration
    #
    if o.context != '':
        cfg.set('crawler', 'context', o.context)
    try:
        exitpath = cfg.get('crawler', 'exitpath')
    except CrawlConfig.NoOptionError as e:
        print("No exit path is specified in the configuration")
        sys.exit(1)

    vstr = "HPSS Integrity Crawler version %s" % version.__version__
    log = CrawlConfig.log(vstr, logpath=o.logfile, cfg=cfg)
    pfpath = make_pidfile(os.getpid(),
                          cfg.get('crawler', 'context'),
                          exitpath,
                          just_check=True)
    crawler = CrawlDaemon(pfpath,
                          stdout="crawler.stdout",
                          stderr="crawler.stderr",
                          logger=log,
                          workdir='.')
    CrawlConfig.log('crl_start: calling crawler.start()')
    crawler.start()
    pass
Ejemplo n.º 18
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)
Ejemplo n.º 19
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)
Ejemplo n.º 20
0
def cvv_report(argv):
    """report - show the checksum verifier database status

    select count(*) from checkables where type = 'f';
    select count(*) from checkables where checksum <> 0;
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-p',
                 '--prefix',
                 action='store',
                 default='',
                 dest='prefix',
                 help='table name prefix')
    p.add_option('-v',
                 '--verbose',
                 action='store_true',
                 default=False,
                 dest='verbose',
                 help='pass verbose flag to HSI object')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config != '':
        cfg = CrawlConfig.get_config(o.config)
    else:
        cfg = CrawlConfig.get_config()

    if o.prefix != '':
        cfg.set('dbi', 'tbl_prefix', o.prefix)

    dim = {}
    dim['cos'] = Dimension.get_dim('cos')
    dim['ttypes'] = Dimension.get_dim('ttypes')

    print dim['cos'].report()
    print dim['ttypes'].report()
Ejemplo n.º 21
0
 def populate_cart(self, h):
     """
     Fill in the cart field
     """
     rsp = h.lsP(self.path)
     tmp = Checkable.fdparse(rsp.split("\n")[1])
     try:
         self.cart = tmp.cart
     except AttributeError:
         self.cart = ''
         CrawlConfig.log("%s <- Checkable.fdparse('%s')" %
                         (tmp, rsp.split("\n")[1]))
Ejemplo n.º 22
0
 def populate_cart(self, h):
     """
     Fill in the cart field
     """
     rsp = h.lsP(self.path)
     tmp = Checkable.fdparse(rsp.split("\n")[1])
     try:
         self.cart = tmp.cart
     except AttributeError:
         self.cart = ''
         CrawlConfig.log("%s <- Checkable.fdparse('%s')" %
                         (tmp, rsp.split("\n")[1]))
Ejemplo n.º 23
0
    def load_recheck_list(cls, how_many):
        """
        Look to see whether any of the already checksummed items in the
        database have a last check time over the threshold for rechecking. If
        so, we'll shove some of them to the front of the list based on the
        configuration.
        """
        cfg = CrawlConfig.add_config()
        r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0'))
        r_age = cfg.get_time('cv', 'recheck_age', 365 * 24 * 3600)
        threshold = int(time.time() - r_age)
        CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold)
        if r_fraction == 0.0:
            return []

        limit = round(r_fraction * how_many)

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {
            'table':
            'checkables',
            'fields': [
                'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum',
                'last_check', 'fails', 'reported'
            ],
            'where':
            'checksum <> 0 and last_check < %d' % threshold,
            'orderby':
            'last_check',
            'limit':
            limit
        }

        rows = db.select(**kw)
        db.close()

        rval = []
        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            in_db=True,
                            dirty=False)
            rval.append(new)
        return rval
Ejemplo n.º 24
0
    def load_recheck_list(cls, how_many):
        """
        Look to see whether any of the already checksummed items in the
        database have a last check time over the threshold for rechecking. If
        so, we'll shove some of them to the front of the list based on the
        configuration.
        """
        cfg = CrawlConfig.add_config()
        r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0'))
        r_age = cfg.get_time('cv', 'recheck_age', 365*24*3600)
        threshold = int(time.time() - r_age)
        CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold)
        if r_fraction == 0.0:
            return []

        limit = round(r_fraction * how_many)

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {'table': 'checkables',
              'fields': ['rowid',
                         'path',
                         'type',
                         'cos',
                         'cart',
                         'ttypes',
                         'checksum',
                         'last_check',
                         'fails',
                         'reported'],
              'where': 'checksum <> 0 and last_check < %d' % threshold,
              'orderby': 'last_check',
              'limit': limit}

        rows = db.select(**kw)
        db.close()

        rval = []
        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            in_db=True,
                            dirty=False)
            rval.append(new)
        return rval
Ejemplo n.º 25
0
def cvv_show_next(argv):
    """show_next - Report the Checkables in the order they will be checked

    usage: cvtool show_next
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--config',
                 action='store', default='', dest='config',
                 help='alternate configuration')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-i', '--id',
                 action='store', default='', dest='id',
                 help='id of entry to be checked')
    p.add_option('-l', '--limit',
                 action='store', default=-1, dest='limit', type=int,
                 help='max records to get')
    p.add_option('-p', '--path',
                 action='store', default='', dest='path',
                 help='name of path to be checked')
    p.add_option('-v', '--verbose',
                 action='store_true', default=False, dest='verbose',
                 help='more information')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config:
        cfg = CrawlConfig.add_config(close=True, filename=o.config)
    else:
        cfg = CrawlConfig.add_config()

    if o.limit < 0:
        limit = int(cfg.get_d('cv', 'operations', '10'))
    else:
        limit = o.limit

    clist = Checkable.Checkable.get_list(limit)
    for c in clist:
        if c.last_check == 0:
            print("%18d %s %s" % (c.last_check,
                                  c.type,
                                  c.path))
        else:
            print("%s %s %s" % (U.ymdhms(c.last_check),
                                c.type,
                                c.path))
Ejemplo n.º 26
0
 def fire(self):
     """
     Run the plugin.
     """
     if self.firable:
         CrawlConfig.log("%s: firing" % self.name)
         # sys.modules[self.modname].main(self.cfg)
         errors = self.plugin.main(self.cfg)
         self.last_fired = time.time()
         crawl_sublib.record_history(self.name, self.last_fired, errors)
     elif self.cfg.getboolean('crawler', 'verbose'):
         CrawlConfig.log("%s: not firable" % self.name)
         self.last_fired = time.time()
Ejemplo n.º 27
0
    def add_to_sample(self, hsi, already_hashed=False):
        """
        Add the current Checkable to the sample. If already_hashed is True,
        this is a file for which a checksum has already been computed. We just
        need to record that fact by setting its checksum member to 1 and
        updating the sample count.

        If already_hashed is False, we need to carry out the following steps:

         1) run hashcreate on the file
         2) set checksum to non-zero to record that we have a checksum
         3) update the sample count in the Dimension object
        """
        if not already_hashed:
            CrawlConfig.log("starting hashcreate on %s", self.path)
            rsp = hsi.hashcreate(self.path)
            if "TIMEOUT" in rsp or "ERROR" in rsp:
                CrawlConfig.log("hashcreate transfer failed on %s", self.path)
                hsi.quit()
                self.set('fails', self.fails + 1)
                return "skipped"
            elif "Access denied" in rsp:
                CrawlConfig.log("hashcreate failed with 'access denied' on %s",
                                self.path)
                hsi.quit()
                return "access denied"
            else:
                CrawlConfig.log("completed hashcreate on %s", self.path)

        if self.checksum == 0:
            for dn in self.dim:
                cat = getattr(self, dn)
                self.dim[dn].addone(cat)
            self.set('checksum', 1)
            return "checksummed"
Ejemplo n.º 28
0
    def add_to_sample(self, hsi, already_hashed=False):
        """
        Add the current Checkable to the sample. If already_hashed is True,
        this is a file for which a checksum has already been computed. We just
        need to record that fact by setting its checksum member to 1 and
        updating the sample count.

        If already_hashed is False, we need to carry out the following steps:

         1) run hashcreate on the file
         2) set checksum to non-zero to record that we have a checksum
         3) update the sample count in the Dimension object
        """
        if not already_hashed:
            CrawlConfig.log("starting hashcreate on %s", self.path)
            rsp = hsi.hashcreate(self.path)
            if "TIMEOUT" in rsp or "ERROR" in rsp:
                CrawlConfig.log("hashcreate transfer failed on %s", self.path)
                hsi.quit()
                self.set('fails', self.fails + 1)
                return "skipped"
            elif "Access denied" in rsp:
                CrawlConfig.log("hashcreate failed with 'access denied' on %s",
                                self.path)
                hsi.quit()
                return "access denied"
            else:
                CrawlConfig.log("completed hashcreate on %s", self.path)

        if self.checksum == 0:
            for dn in self.dim:
                cat = getattr(self, dn)
                self.dim[dn].addone(cat)
            self.set('checksum', 1)
            return "checksummed"
Ejemplo n.º 29
0
def crl_fire(argv):
    """fire - run a plugin

    usage: crawl fire --cfg cfgname --logpath logfname --plugin plugname
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-l',
                 '--logpath',
                 action='store',
                 default='',
                 dest='logpath',
                 help='specify where to send the output')
    p.add_option('-p',
                 '--plugin',
                 action='store',
                 default='',
                 dest='plugname',
                 help='which plugin to fire')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)
    CrawlConfig.log(logpath=o.logpath, cfg=cfg)

    if o.plugname == '':
        print("'-p <plugin-name>' is required")
    elif not cfg.has_section(o.plugname):
        print("No plugin named '%s' found in configuration" % o.plugname)
    else:
        plugdir = cfg.get('crawler', 'plugin-dir')
        sys.path.append(plugdir)
        __import__(o.plugname)
        CrawlConfig.log('firing %s', o.plugname)
        sys.modules[o.plugname].main(cfg)
Ejemplo n.º 30
0
def make_pidfile(pid, context, exitpath, just_check=False):
    """
    Generate a pid file in the pid directory (defined in CrawlDaemon), creating
    the directory if necessary.
    """
    ok = False
    piddir = CrawlConfig.pid_dir()
    if not os.path.exists(piddir):
        os.mkdir(piddir)
        ok = True

    if not ok:
        pf_l = [x for x in glob.glob("%s/*" % piddir)
                if not x.endswith('.DEFUNCT')]
        for pf_n in pf_l:
            data = util.contents(pf_n)
            if 0 == len(data):
                continue
            (ctx, xp) = data.strip().split()
            if ctx == context:
                raise StandardError("The pidfile for context %s exists" %
                                    context)

    pfname = "%s/%d" % (piddir, pid)
    if just_check:
        return pfname

    with open(pfname, 'w') as f:
        f.write("%s %s\n" % (context, exitpath))

    return pfname
Ejemplo n.º 31
0
 def __init__(self, *args, **kwargs):
     """
     Set piddir for the object from the configuration, then call the
     parent's constructor.
     """
     self.piddir = CrawlConfig.pid_dir()
     super(CrawlDaemon, self).__init__(*args, **kwargs)
Ejemplo n.º 32
0
def crl_dbdrop(argv):
    """dbdrop - drop a database table

    usage: crawl dbdrop [-f] <table-name>

    Drop database table <table-name>
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-f', '--force',
                 action='store_true', default=False, dest='force',
                 help='proceed without confirmation')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    tbpfx = cfg.get('dbi', 'tbl_prefix')
    tname = a[0]
    answer = raw_input("About to drop db table %s_%s. Are you sure? > " %
                       (tbpfx, tname))
    if answer[0].lower() != "y":
        sys.exit()

    result = dbschem.drop_table(cfg, tname)
    print(result)
Ejemplo n.º 33
0
 def __init__(self, *args, **kwargs):
     """
     Set piddir for the object from the configuration, then call the
     parent's constructor.
     """
     self.piddir = CrawlConfig.pid_dir()
     super(CrawlDaemon, self).__init__(*args, **kwargs)
Ejemplo n.º 34
0
def crl_dbdrop(argv):
    """dbdrop - drop a database table

    usage: crawl dbdrop [-f] <table-name>

    Drop database table <table-name>
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-f',
                 '--force',
                 action='store_true',
                 default=False,
                 dest='force',
                 help='proceed without confirmation')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    tbpfx = cfg.get('dbi', 'tbl_prefix')
    tname = a[0]
    answer = raw_input("About to drop db table %s_%s. Are you sure? > " %
                       (tbpfx, tname))
    if answer[0].lower() != "y":
        sys.exit()

    result = dbschem.drop_table(cfg, tname)
    print(result)
Ejemplo n.º 35
0
def make_pidfile(pid, context, exitpath, just_check=False):
    """
    Generate a pid file in the pid directory (defined in CrawlDaemon), creating
    the directory if necessary.
    """
    ok = False
    piddir = CrawlConfig.pid_dir()
    if not os.path.exists(piddir):
        os.mkdir(piddir)
        ok = True

    if not ok:
        pf_l = [
            x for x in glob.glob("%s/*" % piddir) if not x.endswith('.DEFUNCT')
        ]
        for pf_n in pf_l:
            data = util.contents(pf_n)
            if 0 == len(data):
                continue
            (ctx, xp) = data.strip().split()
            if ctx == context:
                raise StandardError("The pidfile for context %s exists" %
                                    context)

    pfname = "%s/%d" % (piddir, pid)
    if just_check:
        return pfname

    with open(pfname, 'w') as f:
        f.write("%s %s\n" % (context, exitpath))

    return pfname
Ejemplo n.º 36
0
def drop_table(cfg=None, prefix=None, table=None):
    """
    This wraps the table dropping operation.
    """
    if table is None:
        return(MSG.nothing_to_drop)

    if cfg is None:
        cfg = CrawlConfig.get_config()

    if prefix is None:
        prefix = cfg.get('dbi-crawler', 'tbl_prefix')
    else:
        cfg.set('dbi-crawler', 'tbl_prefix', prefix)

    db = CrawlDBI.DBI(dbtype="crawler", cfg=cfg)
    if not db.table_exists(table=table):
        rval = ("Table '%s' does not exist" % (table))
    else:
        db.drop(table=table)
        if db.table_exists(table=table):
            rval = ("Attempt to drop table '%s' failed" % (table))
        else:
            rval = ("Attempt to drop table '%s' was successful" % (table))

    db.close()
    return rval
Ejemplo n.º 37
0
def tccp_zreport(args):
    """zreport - show what tcc_report will do with a bitfile id

    usage: tcc zreport NSOBJECT-ID

    Note: This will only report bitfiles where the COS count and file count
    differ. Giving it any old object id won't necessarily generate any output.
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    try:
        nsobj_id = a[0]
    except:
        print("usage: tcc zreport OBJECT_ID")
        return

    cfg = CrawlConfig.get_config()
    outfile = cfg.get(tcc_lib.sectname(), 'report_file')

    cosinfo = tcc_lib.get_cos_info()
    try:
        bfl = tcc_lib.get_bitfile_set(int(nsobj_id), 1)
    except U.HpssicError as e:
        bfl = []
        pass
    print("Writing output to %s" % outfile)
    for bf in bfl:
        tcc_lib.tcc_report(bf, cosinfo)
Ejemplo n.º 38
0
    def __init__(self, connect=True, *args, **kwargs):
        """
        Initialize the object
        """
        self.prompt = "]:"
        self.verbose = False
        self.unavailable = False
        self.xobj = None
        self.timeout = 60

        cmdopts = " ".join(args)
        for key in kwargs:
            setattr(self, key, kwargs[key])

        cfg = CrawlConfig.get_config()
        if not hasattr(self, 'reset_atime'):
            self.reset_atime = cfg.getboolean('cv', 'reset_atime')

        if not hasattr(self, 'hash_algorithm'):
            self.hash_algorithm = cfg.get_d('cv', 'hash_algorithm', None)

        maybe_update_hsi()
        self.cmd = "hsi " + cmdopts
        if connect:
            self.connect()
Ejemplo n.º 39
0
def mprf_reset(args):
    """reset - drop the mpra table and remove mpra_report.txt

    usage: mpra reset

    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-f', '--force',
                 action='store_true', default=False, dest='force',
                 help='force the operation')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    if not o.force:
        answer = raw_input(MSG.all_mpra_data_lost)
        if answer[0].lower() != "y":
            raise SystemExit()

    cfg = CrawlConfig.get_config(o.config)

    dbschem.drop_table(cfg=cfg, table='mpra')

    filename = cfg.get('mpra', 'report_file')
    util.conditional_rm(filename)
Ejemplo n.º 40
0
def crl_cfgdump(argv):
    """cfgdump - load a config file and dump its contents

    usage: crawl cfgdump -c <filename> [--to stdout|log] [--logpath <path>]
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--cfg',
                 action='store',
                 default='',
                 dest='config',
                 help='config file name')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-t',
                 '--to',
                 action='store',
                 default='',
                 dest='target',
                 help='specify where to send the output')
    p.add_option('-l',
                 '--logpath',
                 action='store',
                 default='',
                 dest='logpath',
                 help='specify where to send the output')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    if o.target == '':
        o.target = 'stdout'

    cfg = CrawlConfig.get_config(o.config)
    dumpstr = cfg.dump()

    if o.target == 'stdout':
        print dumpstr
    elif o.target == 'log':
        log = CrawlConfig.log(logpath=o.logpath, cfg=cfg)
        for line in dumpstr.split("\n"):
            CrawlConfig.log(line)
Ejemplo n.º 41
0
def get_last_rpt_time(db):
    """
    Retrieve the last report time from the report table. If the table does not
    exist before make_table ('Created' in result), the table is empty so we
    just return 0 to indicate no last report time.
    """
    result = dbschem.make_table("report")
    if "Created" in result:
        rval = 0
    else:
        rows = db.select(table='report', fields=['max(report_time)'])
        (rval) = rows[0][0]
        if rval is None:
            rval = 0

    CrawlConfig.log("time of last report: %d" % rval)
    return rval
Ejemplo n.º 42
0
def highest_nsobject_id():
    """
    Cache and return the largest NSOBJECT id in the DB2 database. The variables
    highest_nsobject_id._max_obj_id and highest_nsobject_id._when are local to
    this function but do not lose their values between invocations.
    """
    if (not hasattr(highest_nsobject_id, '_max_obj_id')
            or not hasattr(highest_nsobject_id, '_when')
            or 60 < time.time() - highest_nsobject_id._when):
        highest_nsobject_id._max_obj_id = max_nsobj_id()
        highest_nsobject_id._when = time.time()
        CrawlConfig.log("max object id = %d at %s" %
                        (highest_nsobject_id._max_obj_id,
                         util.ymdhms(highest_nsobject_id._when)))

    rval = highest_nsobject_id._max_obj_id
    return rval
Ejemplo n.º 43
0
def highest_nsobject_id():
    """
    Cache and return the largest NSOBJECT id in the DB2 database. The variables
    highest_nsobject_id._max_obj_id and highest_nsobject_id._when are local to
    this function but do not lose their values between invocations.
    """
    if (not hasattr(highest_nsobject_id, '_max_obj_id') or
            not hasattr(highest_nsobject_id, '_when') or
            60 < time.time() - highest_nsobject_id._when):
        highest_nsobject_id._max_obj_id = max_nsobj_id()
        highest_nsobject_id._when = time.time()
        CrawlConfig.log("max object id = %d at %s" %
                        (highest_nsobject_id._max_obj_id,
                         util.ymdhms(highest_nsobject_id._when)))

    rval = highest_nsobject_id._max_obj_id
    return rval
Ejemplo n.º 44
0
def get_last_rpt_time(db):
    """
    Retrieve the last report time from the report table. If the table does not
    exist before make_table ('Created' in result), the table is empty so we
    just return 0 to indicate no last report time.
    """
    result = dbschem.make_table("report")
    if "Created" in result:
        rval = 0
    else:
        rows = db.select(table='report',
                         fields=['max(report_time)'])
        (rval) = rows[0][0]
        if rval is None:
            rval = 0

    CrawlConfig.log("time of last report: %d" % rval)
    return rval
Ejemplo n.º 45
0
    def hashcreate(self, pathnames):
        """
        Argument pathnames should reference one or more files. It may be a
        string containing one or more space separated file paths, or a list of
        one or more file paths. If it has type unicode, it will be encoded to
        'ascii' before being treated as a string.
        """
        if type(pathnames) == str:
            pathlist = pathnames.split()
        elif type(pathnames) == list:
            pathlist = pathnames
        elif type(pathnames) == unicode:
            pathlist = pathnames.encode('ascii', 'ignore').split()
        else:
            raise HSIerror("%s: Invalid argument (%s: '%s')" %
                           (util.my_name(), type(pathnames), pathnames))
        rval = ""
        for path in pathlist:
            if self.reset_atime:
                prev_time = self.access_time(path)

            if self.hash_algorithm is None:
                cmd = "hashcreate %s" % path
            else:
                cmd = "hashcreate -H %s %s" % (self.hash_algorithm, path)
            self.xobj.sendline(cmd)
            which = self.xobj.expect([self.prompt, pexpect.TIMEOUT] +
                                     self.hsierrs)
            while which == 1 and 1 < len(self.xobj.before):
                CrawlConfig.log("got a timeout, continuing because before " +
                                "is not empty and does not contain an error")
                rval += self.xobj.before
                which = self.xobj.expect([self.prompt, pexpect.TIMEOUT] +
                                         self.hsierrs)
            rval += self.xobj.before
            if 1 == which:
                rval += " TIMEOUT"
            elif 0 != which:
                rval += " ERROR"

            if self.reset_atime:
                self.touch(path, when=prev_time)

        return rval
Ejemplo n.º 46
0
def crl_log(argv):
    """log - write a message to the indicated log file

    usage: crawl log --log <filename> <message>
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--log',
                 action='store', default=None, dest='logfile',
                 help='specify the log file')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log(" ".join(a), logpath=o.logfile, cfg=cfg)
Ejemplo n.º 47
0
    def __init__(self, name=None, cfg=None):
        """
        Configuration data is read and copied into the object by method
        init_cfg_data(), called by both the constructor and reload().
        init_cfg_data() reverses the order of cfg and name in its argument list
        from the constructor so name can have a default and reload() doesn't
        have to pass it.

        last_fired is initialized by the constructor but not by reload(). So if
        the plugin is updated by a reconfigure, it won't lose its last fire
        time but will stay on the same schedule.
        """
        assert(name is not None)
        assert(cfg is not None)
        self.cfg = cfg
        l = CrawlConfig.log(cfg=cfg, close=True)
        CrawlConfig.log("%s: Initializing plugin data" % name)
        self.init_cfg_data(name, cfg)
        self.last_fired = time.time() - self.frequency - 1
        super(CrawlPlugin, self).__init__()
Ejemplo n.º 48
0
 def addable(self):
     """
     Determine which Dimensions want this item added. Note that we want this
     routine to be general across dimensions so we don't want it to assume
     anything about the dimension it's checking (like that it's named 'cos'
     for example). That why calls to this pass in cos rather than looking at
     the value in the object.
     """
     for dn in self.dim:
         cval = getattr(self, dn)
         if self.dim[dn].vote(cval) is False:
             CrawlConfig.log("%s votes against %s -- skipping" %
                             (dn, self.path))
             return False
     randval = random.random()
     if self.probability < randval:
         CrawlConfig.log("random votes against %s -- skipping (%g < %g)" %
                         (self.path, self.probability, randval))
         return False
     return True
Ejemplo n.º 49
0
 def addable(self):
     """
     Determine which Dimensions want this item added. Note that we want this
     routine to be general across dimensions so we don't want it to assume
     anything about the dimension it's checking (like that it's named 'cos'
     for example). That why calls to this pass in cos rather than looking at
     the value in the object.
     """
     for dn in self.dim:
         cval = getattr(self, dn)
         if self.dim[dn].vote(cval) is False:
             CrawlConfig.log("%s votes against %s -- skipping" %
                             (dn, self.path))
             return False
     randval = random.random()
     if self.probability < randval:
         CrawlConfig.log("random votes against %s -- skipping (%g < %g)" %
                         (self.path, self.probability, randval))
         return False
     return True
Ejemplo n.º 50
0
def cvv_report(argv):
    """report - show the checksum verifier database status

    select count(*) from checkables where type = 'f';
    select count(*) from checkables where checksum <> 0;
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-p', '--prefix',
                 action='store', default='', dest='prefix',
                 help='table name prefix')
    p.add_option('-v', '--verbose',
                 action='store_true', default=False, dest='verbose',
                 help='pass verbose flag to HSI object')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config != '':
        cfg = CrawlConfig.get_config(o.config)
    else:
        cfg = CrawlConfig.get_config()

    if o.prefix != '':
        cfg.set('dbi', 'tbl_prefix', o.prefix)

    dim = {}
    dim['cos'] = Dimension.get_dim('cos')
    dim['ttypes'] = Dimension.get_dim('ttypes')

    print dim['cos'].report()
    print dim['ttypes'].report()
Ejemplo n.º 51
0
def mpra_fetch_recent(type):
    """
    Retrieve and return the most recent record reported so we don't report the
    same record repeatedly
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    if not db.table_exists(table='mpra'):
        CrawlConfig.log("Fetch from not existent mpra table -- return 0")
        return 0

    rows = db.select(table='mpra',
                     fields=['scan_time, end_time'],
                     where='type = ?',
                     data=(type,))
    last_end_time = -1
    max_scan_time = 0
    for r in rows:
        if max_scan_time < r[0]:
            max_scan_time = r[0]
            last_end_time = r[1]

    if last_end_time < 0:
        CrawlConfig.log("No '%s' value in mpra -- returning 0" % type)
        return 0
    else:
        CrawlConfig.log("Fetch '%s' from mpra table -- return %d" %
                        (type, last_end_time))
        return last_end_time
Ejemplo n.º 52
0
def simplug(plugin, args):
    """
    Common plugin simulator. May be used by the interactive tools to simulate
    running the associated plugin.
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-i',
                 '--iterations',
                 action='store',
                 default=1,
                 dest='iterations',
                 type='int',
                 help='how many iterations to run')
    (o, a) = p.parse_args(args)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config()
    CrawlConfig.log("starting %s simplug, just got config" % plugin)
    sys.path.append(cfg.get('crawler', 'plugin-dir'))
    modname = cfg.get(plugin, 'module')
    try:
        P = __import__(modname)
    except ImportError:
        H = __import__('hpssic.plugins.' + modname)
        P = getattr(H.plugins, modname)
    P.main(cfg)
    if 1 < o.iterations:
        for count in range(o.iterations - 1):
            stime = cfg.get_time(plugin, 'frequency')
            time.sleep(stime)
            P.main(cfg)
Ejemplo n.º 53
0
def maybe_update_hsi():
    """
    If the hsi wrapper script has changed, grab and edit a fresh copy
    """
    l = util.which_all('hsi')
    trg = l[0]
    tc = util.contents(trg).split("\n")
    tv = util.grep('^BINARYVERSION=', tc)

    s = [x for x in l if 'sources/hpss' in x]
    src = s[0]
    sc = util.contents(src).split("\n")
    sv = util.grep('^BINARYVERSION=', sc)

    if tv[0] != sv[0]:
        z = util.grep("${EXECUTABLE}", sc, regex=False, index=True)
        sc[z[0]] = "exec " + sc[z[0]]
        try:
            f = open(trg, 'w')
            f.writelines("\n".join(sc) + "\n")
            f.close()
        except IOError as e:
            CrawlConfig.log(MSG.hsi_wrap_ood)
Ejemplo n.º 54
0
def crl_fire(argv):
    """fire - run a plugin

    usage: crawl fire --cfg cfgname --logpath logfname --plugin plugname
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--cfg',
                 action='store', default='', dest='config',
                 help='config file name')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-l', '--logpath',
                 action='store', default='', dest='logpath',
                 help='specify where to send the output')
    p.add_option('-p', '--plugin',
                 action='store', default='', dest='plugname',
                 help='which plugin to fire')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    cfg = CrawlConfig.get_config(o.config)
    CrawlConfig.log(logpath=o.logpath, cfg=cfg)

    if o.plugname == '':
        print("'-p <plugin-name>' is required")
    elif not cfg.has_section(o.plugname):
        print("No plugin named '%s' found in configuration" % o.plugname)
    else:
        plugdir = cfg.get('crawler', 'plugin-dir')
        sys.path.append(plugdir)
        __import__(o.plugname)
        CrawlConfig.log('firing %s', o.plugname)
        sys.modules[o.plugname].main(cfg)
Ejemplo n.º 55
0
    def fail_report(self, msg):
        """
        Report a failure
        """
        try:
            f = self.fail_report_fh
        except AttributeError:
            cfg = CrawlConfig.get_config()
            filename = cfg.get('checksum-verifier', 'fail_report')
            self.fail_report_fh = open(filename, 'a')
            f = self.fail_report_fh

        f.write("Failure retrieving file %s: '%s'\n" % (self.path, msg))
        self.set('reported', 1)
        f.flush()
Ejemplo n.º 56
0
    def fail_report(self, msg):
        """
        Report a failure
        """
        try:
            f = self.fail_report_fh
        except AttributeError:
            cfg = CrawlConfig.get_config()
            filename = cfg.get('checksum-verifier', 'fail_report')
            self.fail_report_fh = open(filename, 'a')
            f = self.fail_report_fh

        f.write("Failure retrieving file %s: '%s'\n" % (self.path, msg))
        self.set('reported', 1)
        f.flush()
Ejemplo n.º 57
0
def stop_wait(cfg=None):
    """
    Watch for the crawler's exit file to disappear. If it's still there after
    the timeout period, give up and throw an exception.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    context = cfg.get('crawler', 'context')
    exitpath = cfg.get('crawler', 'exitpath')
    timeout = cfg.get_time('crawler', 'stopwait_timeout', 5.0)
    sleep_time = cfg.get_time('crawler', 'sleep_time', 0.25)
    lapse = 0.0

    while is_running(context) and lapse < timeout:
        time.sleep(sleep_time)
        lapse += sleep_time

    if is_running(context) and timeout <= lapse:
        raise util.HpssicError("Stop wait timeout exceeded")
Ejemplo n.º 58
0
def is_running(context=None):
    """
    Return True if the crawler is running (per ps(1)) or False otherwise.
    """
    running = False
    if context is None:
        cfg = CrawlConfig.get_config()
        try:
            context = cfg.get('crawler', 'context')
        except CrawlConfig.NoOptionError as e:
            emsg = ("No option 'context' in section 'crawler', file '%s'" %
                    cfg.filename)
            raise StandardError(emsg)

    rpi_l = running_pid(context=context)
    for rpi in rpi_l:
        if rpi[1] == context:
            running = True

    return running