Beispiel #1
0
def get_html_report(cfg_file=None, cfg=None):
    """
    Format a report in HTML
    """
    rval = ""
    if cfg is not None:
        # use it
        pass
    elif cfg_file is not None:
        cfg = CrawlConfig.add_config(filename=cfg_file)
    else:
        cfg = CrawlConfig.add_config()

    db = CrawlDBI.DBI(dbtype="crawler")

    last_rpt_time = rpt_lib.get_last_rpt_time(db)
    rval += ('<head><meta http-equiv="refresh" content="60">\n')
    rval += ("<title>HPSSIC Dashboard</title></head>")
    rval += ("<body><center><h1>HPSS Integrity Crawler Dashboard</h1>" +
             "<br><h4>Version %s</h4>" % version.__version__ +
             "</center>\n")
    rval += ("Report generated at %s\n" % time.strftime("%Y.%m%d %H:%M:%S"))
    rval += ("<br>Based on data from %s\n" %
             time.strftime("%Y.%m%d %H:%M:%S", time.localtime(last_rpt_time)))
    rval += get_html_cv_report(db, last_rpt_time)
    rval += get_html_mpra_report(db, last_rpt_time)
    rval += get_html_tcc_report(db, last_rpt_time)
    rval += "</body>"
    db.close()

    return rval
Beispiel #2
0
def cvv_show_next(argv):
    """show_next - Report the Checkables in the order they will be checked

    usage: cvtool show_next
    """
    p = optparse.OptionParser()
    p.add_option('-c', '--config',
                 action='store', default='', dest='config',
                 help='alternate configuration')
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-i', '--id',
                 action='store', default='', dest='id',
                 help='id of entry to be checked')
    p.add_option('-l', '--limit',
                 action='store', default=-1, dest='limit', type=int,
                 help='max records to get')
    p.add_option('-p', '--path',
                 action='store', default='', dest='path',
                 help='name of path to be checked')
    p.add_option('-v', '--verbose',
                 action='store_true', default=False, dest='verbose',
                 help='more information')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config:
        cfg = CrawlConfig.add_config(close=True, filename=o.config)
    else:
        cfg = CrawlConfig.add_config()

    if o.limit < 0:
        limit = int(cfg.get_d('cv', 'operations', '10'))
    else:
        limit = o.limit

    clist = Checkable.Checkable.get_list(limit)
    for c in clist:
        if c.last_check == 0:
            print("%18d %s %s" % (c.last_check,
                                  c.type,
                                  c.path))
        else:
            print("%s %s %s" % (U.ymdhms(c.last_check),
                                c.type,
                                c.path))
Beispiel #3
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Beispiel #4
0
def running_pid(proc_required=True, context=None):
    """
    Return a list of pids if the crawler is running (per ps(1)) or [] otherwise
    """
    cfg = CrawlConfig.add_config()

    rval = []
    if proc_required:
        result = pidcmd()
        for line in result.split("\n"):
            if 'crawl start' in line:
                pid = int(line.split()[0])
                pfpath = "%s/%d" % (CrawlConfig.pid_dir(), pid)
                if os.path.exists(pfpath):
                    (ctx, xpath) = util.contents(pfpath).strip().split()
                    rval.append((pid, ctx, xpath))
                elif not os.path.exists(pfpath + '.DEFUNCT'):
                    # crawler is running but the pid file has been lost
                    ctx = context or cfg.get('crawler', 'context')
                    xpath = cfg.get_d('crawler', 'exitpath', '%s.exit' % ctx)
                    make_pidfile(pid, ctx, xpath)
                    rval.append((pid, ctx, xpath))
                # if pfpath + '.DEFUNCT' exists, the crawler is shutting down
                # so we don't want to recreate the pid file.
    else:
        pid_l = glob.glob("%s/*" % CrawlConfig.pid_dir())
        for pid_n in pid_l:
            pid = int(os.path.basename(pid_n))
            (ctx, xpath) = util.contents(pid_n).strip().split()
            rval.append((pid, ctx, xpath))

    return rval
Beispiel #5
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)
Beispiel #6
0
def clean_defunct_pidfiles(context):
    """
    Remove .DEFUNCT pid files for *context*
    """
    cfg = CrawlConfig.add_config()
    pdir = CrawlConfig.pid_dir()
    for path in glob.glob(os.path.join(pdir, '*.DEFUNCT')):
        c = util.contents(path)
        if context in c:
            os.unlink(path)
Beispiel #7
0
    def load_recheck_list(cls, how_many):
        """
        Look to see whether any of the already checksummed items in the
        database have a last check time over the threshold for rechecking. If
        so, we'll shove some of them to the front of the list based on the
        configuration.
        """
        cfg = CrawlConfig.add_config()
        r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0'))
        r_age = cfg.get_time('cv', 'recheck_age', 365*24*3600)
        threshold = int(time.time() - r_age)
        CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold)
        if r_fraction == 0.0:
            return []

        limit = round(r_fraction * how_many)

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {'table': 'checkables',
              'fields': ['rowid',
                         'path',
                         'type',
                         'cos',
                         'cart',
                         'ttypes',
                         'checksum',
                         'last_check',
                         'fails',
                         'reported'],
              'where': 'checksum <> 0 and last_check < %d' % threshold,
              'orderby': 'last_check',
              'limit': limit}

        rows = db.select(**kw)
        db.close()

        rval = []
        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            in_db=True,
                            dirty=False)
            rval.append(new)
        return rval
Beispiel #8
0
    def load_recheck_list(cls, how_many):
        """
        Look to see whether any of the already checksummed items in the
        database have a last check time over the threshold for rechecking. If
        so, we'll shove some of them to the front of the list based on the
        configuration.
        """
        cfg = CrawlConfig.add_config()
        r_fraction = float(cfg.get_d('cv', 'recheck_fraction', '0.0'))
        r_age = cfg.get_time('cv', 'recheck_age', 365 * 24 * 3600)
        threshold = int(time.time() - r_age)
        CrawlConfig.log("threshold = %s (%d)", U.ymdhms(threshold), threshold)
        if r_fraction == 0.0:
            return []

        limit = round(r_fraction * how_many)

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {
            'table':
            'checkables',
            'fields': [
                'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum',
                'last_check', 'fails', 'reported'
            ],
            'where':
            'checksum <> 0 and last_check < %d' % threshold,
            'orderby':
            'last_check',
            'limit':
            limit
        }

        rows = db.select(**kw)
        db.close()

        rval = []
        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            in_db=True,
                            dirty=False)
            rval.append(new)
        return rval
Beispiel #9
0
def history_load(loadlist, filename):
    """
    Each plugin's sublib has a load_history() routine that knows how to load
    its data to the history file.

    Unfortunately, we do have to know here something special about plugin 'cv'
    to warn the user when a filename was specified without 'cv' in the load
    list or vice versa and when to pass filename to the plugin's load_history()
    method.
    """
    cfg = CrawlConfig.add_config()
    pluglist = U.csv_list(cfg.get_d('crawler', 'plugins', U.default_plugins()))
    ll = U.csv_list(loadlist)
    if 'all' in ll or ll == []:
        ll = copy.deepcopy(pluglist)

    if filename is None and 'cv' in ll:
        print(MSG.history_cv_not_loaded)
        ll.remove('cv')
    elif filename is not None and 'cv' not in ll:
        print(MSG.history_filename_ignored)

    unk_plugs = [x for x in ll if x not in pluglist]
    if 0 < len(unk_plugs):
        print(MSG.unrecognized_plugin_S % ', '.join(unk_plugs))
        map(ll.remove, unk_plugs)

    if ll == []:
        return

    dbschem.make_table('history')
    for plug in [x for x in ll if x in pluglist]:
        print("loading %s..." % plug)
        if plug == 'cv' and filename is not None:
            args = [filename]
        else:
            args = []
        p = CrawlPlugin.CrawlPlugin(name=plug, cfg=cfg)
        p.load_history(*args)
Beispiel #10
0
def history_load(loadlist, filename):
    """
    Each plugin's sublib has a load_history() routine that knows how to load
    its data to the history file.

    Unfortunately, we do have to know here something special about plugin 'cv'
    to warn the user when a filename was specified without 'cv' in the load
    list or vice versa and when to pass filename to the plugin's load_history()
    method.
    """
    cfg = CrawlConfig.add_config()
    pluglist = U.csv_list(cfg.get_d('crawler', 'plugins', U.default_plugins()))
    ll = U.csv_list(loadlist)
    if 'all' in ll or ll == []:
        ll = copy.deepcopy(pluglist)

    if filename is None and 'cv' in ll:
        print(MSG.history_cv_not_loaded)
        ll.remove('cv')
    elif filename is not None and 'cv' not in ll:
        print(MSG.history_filename_ignored)

    unk_plugs = [x for x in ll if x not in pluglist]
    if 0 < len(unk_plugs):
        print(MSG.unrecognized_plugin_S % ', '.join(unk_plugs))
        map(ll.remove, unk_plugs)

    if ll == []:
        return

    dbschem.make_table('history')
    for plug in [x for x in ll if x in pluglist]:
        print("loading %s..." % plug)
        if plug == 'cv' and filename is not None:
            args = [filename]
        else:
            args = []
        p = CrawlPlugin.CrawlPlugin(name=plug, cfg=cfg)
        p.load_history(*args)
Beispiel #11
0
    def __init__(self, *args, **kwargs):
        """
        Initialize a Checkable object -- set the path, type, checksum, cos, and
        last_check to default values, then update them based on the arguments.
        """
        # where the item is in HPSS
        self.path = '---'
        # file ('f') or directory ('d')
        self.type = '-'
        # which COS the file is in (empty for directories)
        self.cos = ''
        # which tape cartridge(s) the file is stored on
        self.cart = None
        # the type of tape cartridge(s)
        self.ttypes = None
        # 1 if we have a checksum stored, else 0
        self.checksum = 0
        # how many times we've tried and failed to retrieve the file content
        self.fails = 0
        # whether we've reported that retrievals are failing for this file
        self.reported = 0
        # when was the last check of this file (epoch time)
        self.last_check = 0
        # this item's row id in the database
        self.rowid = None
        # how likely are we to add an item to the sample?
        self.probability = 0.1
        # whether this object is in the database
        self.in_db = False
        # whether this object has been changed
        self.dirty = False
        # non keyword arguments
        self.args = args

        for k in kwargs:
            if k not in ['rowid',
                         'path',
                         'type',
                         'checksum',
                         'cos',
                         'cart',
                         'ttypes',
                         'dim',
                         'fails',
                         'reported',
                         'last_check',
                         'probability',
                         'in_db',
                         'dirty']:
                raise StandardError("Attribute %s is invalid for Checkable" %
                                    k)
            setattr(self, k, kwargs[k])
        for attr in ['checksum', 'fails', 'reported']:
            if getattr(self, attr) is None:
                setattr(self, attr, 0)

        # Set up dimensions based on configuration. If no dimensions option is
        # set in the configuration, we just leave the dimensions dict emtpy.
        # Since this class is only used by the cv_plugin, it makes no sense for
        # this code to be running if there is no cv section in the
        # configuration, so we'll let that exception get thrown up the stack.
        cfg = CrawlConfig.add_config()
        self.dim = {}
        try:
            dim_l = util.csv_list(cfg.get('cv', 'dimensions'))
            for dname in dim_l:
                self.dim[dname] = Dimension.get_dim(dname)
        except CrawlConfig.NoOptionError:
            pass

        super(Checkable, self).__init__()
Beispiel #12
0
    def get_list(cls, how_many=-1, prob=0.1, rootlist=[]):
        """
        Return the current list of Checkables from the database.
        """
        if how_many < 0:
            cfg = CrawlConfig.add_config()
            how_many = int(cfg.get_d('cv', 'operations', '30'))

        rval = Checkable.load_priority_list()
        if how_many <= len(rval):
            return rval

        rval.extend(Checkable.load_recheck_list(how_many))
        if how_many <= len(rval):
            return rval

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {'table': 'checkables',
              'fields': ['rowid',
                         'path',
                         'type',
                         'cos',
                         'cart',
                         'ttypes',
                         'checksum',
                         'last_check',
                         'fails',
                         'reported'],
              'orderby': 'last_check'}
        if 0 < how_many:
            kw['limit'] = how_many

        rows = db.select(**kw)

        # check whether any roots from rootlist are missing and if so, add them
        # to the table
        reselect = False
        pathlist = [x[1] for x in rows]
        for root in rootlist:
            if root not in pathlist:
                nr = Checkable(path=root, type='d')
                nr.load()
                nr.persist()
                reselect = True

        if reselect:
            rows = db.select(**kw)

        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            probability=prob,
                            in_db=True,
                            dirty=False)
            if new not in rval:
                rval.append(new)
            if how_many <= len(rval):
                break

        db.close()
        CrawlConfig.log("returning %d items" % len(rval))
        return rval
Beispiel #13
0
    def __init__(self, *args, **kwargs):
        """
        Initialize a Checkable object -- set the path, type, checksum, cos, and
        last_check to default values, then update them based on the arguments.
        """
        # where the item is in HPSS
        self.path = '---'
        # file ('f') or directory ('d')
        self.type = '-'
        # which COS the file is in (empty for directories)
        self.cos = ''
        # which tape cartridge(s) the file is stored on
        self.cart = None
        # the type of tape cartridge(s)
        self.ttypes = None
        # 1 if we have a checksum stored, else 0
        self.checksum = 0
        # how many times we've tried and failed to retrieve the file content
        self.fails = 0
        # whether we've reported that retrievals are failing for this file
        self.reported = 0
        # when was the last check of this file (epoch time)
        self.last_check = 0
        # this item's row id in the database
        self.rowid = None
        # how likely are we to add an item to the sample?
        self.probability = 0.1
        # whether this object is in the database
        self.in_db = False
        # whether this object has been changed
        self.dirty = False
        # non keyword arguments
        self.args = args

        for k in kwargs:
            if k not in [
                    'rowid', 'path', 'type', 'checksum', 'cos', 'cart',
                    'ttypes', 'dim', 'fails', 'reported', 'last_check',
                    'probability', 'in_db', 'dirty'
            ]:
                raise StandardError("Attribute %s is invalid for Checkable" %
                                    k)
            setattr(self, k, kwargs[k])
        for attr in ['checksum', 'fails', 'reported']:
            if getattr(self, attr) is None:
                setattr(self, attr, 0)

        # Set up dimensions based on configuration. If no dimensions option is
        # set in the configuration, we just leave the dimensions dict emtpy.
        # Since this class is only used by the cv_plugin, it makes no sense for
        # this code to be running if there is no cv section in the
        # configuration, so we'll let that exception get thrown up the stack.
        cfg = CrawlConfig.add_config()
        self.dim = {}
        try:
            dim_l = util.csv_list(cfg.get('cv', 'dimensions'))
            for dname in dim_l:
                self.dim[dname] = Dimension.get_dim(dname)
        except CrawlConfig.NoOptionError:
            pass

        super(Checkable, self).__init__()
Beispiel #14
0
    def get_list(cls, how_many=-1, prob=0.1, rootlist=[]):
        """
        Return the current list of Checkables from the database.
        """
        if how_many < 0:
            cfg = CrawlConfig.add_config()
            how_many = int(cfg.get_d('cv', 'operations', '30'))

        rval = Checkable.load_priority_list()
        if how_many <= len(rval):
            return rval

        rval.extend(Checkable.load_recheck_list(how_many))
        if how_many <= len(rval):
            return rval

        db = CrawlDBI.DBI(dbtype='crawler')
        kw = {
            'table':
            'checkables',
            'fields': [
                'rowid', 'path', 'type', 'cos', 'cart', 'ttypes', 'checksum',
                'last_check', 'fails', 'reported'
            ],
            'orderby':
            'last_check'
        }
        if 0 < how_many:
            kw['limit'] = how_many

        rows = db.select(**kw)

        # check whether any roots from rootlist are missing and if so, add them
        # to the table
        reselect = False
        pathlist = [x[1] for x in rows]
        for root in rootlist:
            if root not in pathlist:
                nr = Checkable(path=root, type='d')
                nr.load()
                nr.persist()
                reselect = True

        if reselect:
            rows = db.select(**kw)

        for row in rows:
            tmp = list(row)
            new = Checkable(rowid=tmp.pop(0),
                            path=tmp.pop(0),
                            type=tmp.pop(0),
                            cos=tmp.pop(0),
                            cart=tmp.pop(0),
                            ttypes=tmp.pop(0),
                            checksum=tmp.pop(0),
                            last_check=tmp.pop(0),
                            fails=tmp.pop(0),
                            reported=tmp.pop(0),
                            probability=prob,
                            in_db=True,
                            dirty=False)
            if new not in rval:
                rval.append(new)
            if how_many <= len(rval):
                break

        db.close()
        CrawlConfig.log("returning %d items" % len(rval))
        return rval
Beispiel #15
0
    def dispatch(self):
        """
        Figure out where we're supposed to send this alert and send it.
        Possible destinations are the log file, one or more e-mail addresses,
        and/or a shell program.

        It's also possible for a 'use' option to show up in the alerts section.
        In this case, we're being redirected to another section, also 'use' can
        also point to the current alerts section. There's no reason to ever do
        this, but it could happen so we want to handle it in a reasonable way.

        That's why we sort the config options in the while statement below --
        to make 'use' get handled last, so any other options in the section
        will get handled. Once we process 'use', anything not yet processed in
        the current section is ignored.
        """
        if self.cfg is not None:
            cfg = self.cfg
        else:
            cfg = CrawlConfig.add_config()
        if self.caller != '':
            section = cfg.get(self.caller, 'alerts')
        else:
            section = 'alerts'

        done = False
        while not done:
            for opt in sorted(cfg.options(section)):
                if opt == 'log':
                    # write to log
                    fmt = cfg.get(section, 'log')
                    CrawlConfig.log(fmt, self.msg)
                    done = True

                elif opt == 'shell':
                    # run the program
                    cmd = cfg.get(section, 'shell')
                    if '%s' in cmd:
                        cmdline = cmd % (self.msg)
                    else:
                        cmdline = cmd
                    os.system(cmdline)
                    CrawlConfig.log("ran: '%s'" % (cmdline))
                    done = True

                elif opt == 'email':
                    CrawlMail.send(cfg=cfg,
                                   to="%s.email" % section,
                                   subj="HPSS Integrity Crawler ALERT",
                                   msg=self.msg)
                    done = True

                elif opt == 'use':
                    # delegate to another section
                    done = True
                    new_section = cfg.get(section, 'use')

                    # if it's the same section, ignore the 'use', but we don't
                    # want to break the rule that all options after a 'use' are
                    # ignored. So we set done to True to terminate the while
                    # loop and break unconditionally at the end of this clause
                    # to get out of the for loop
                    if new_section != section:
                        section = new_section
                        done = False
                    break
Beispiel #16
0
def crl_history(argv):
    """history - access to the plugin history

    usage: crawl history [--load|--show|--reset]

    --load {all,cv,mpra,tcc,rpt}
        Load the history table from listed plugin tables, log file

    --show
        Read the history table and report its contents.

    --reset
        Drop the history table.

    --read-log FILENAME
        If --load is specified and includes 'cv', read FILENAME and load cv
        history from it.

    To load just cv data, --load cv --read-log FILENAME
    To load just mpra data, --load mpra
    To load all plugins, --load all (or "") --read-log FILENAME

    If --load contains 'cv' but --read-log is not specified, an error message
    will be issued.

    If --load contains 'all' or is empty and --read-log is not specified, a
    warning will be issued to notify the user that cv data is not being loaded.

    If --load does not contain 'cv' or 'all' and is not empty and --read-log is
    specified, a warning will be issued that the log file is not being read and
    cv data is not being loaded.
    """
    p = optparse.OptionParser()
    p.add_option('-d', '--debug',
                 action='store_true', default=False, dest='debug',
                 help='run the debugger')
    p.add_option('-n', '--dry-run',
                 action='store_true', default=False, dest='dryrun',
                 help='just report')
    p.add_option('-l', '--load',
                 action='store', default=None, dest='loadlist',
                 help='plugins to load')
    p.add_option('-r', '--read-log',
                 action='store', default=None, dest='filename',
                 help='log file for cv history')
    p.add_option('-R', '--reset',
                 action='store_true', default=False, dest='reset',
                 help='drop the history table')
    p.add_option('-s', '--show',
                 action='store', default='unset', dest='show',
                 help='Report the contents of the history table')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    # This is saying, if any two of our primary command line options are set,
    # we have a problem since they are all mutually exclusive.
    if o.show == 'unset':
        o.show = None
    if any([all([o.loadlist is not None, o.reset]),
            all([o.loadlist is not None, o.show]),
            all([o.reset, o.show])]):
        raise SystemExit(MSG.history_options)

    if o.dryrun:
        cfg = CrawlConfig.add_config()
        table = cfg.get('dbi-crawler', 'tbl_prefix') + '_history'
        dbname = cfg.get('dbi-crawler', 'dbname')
        hostname = cfg.get('dbi-crawler', 'hostname')

    if o.show:
        # This option is non-destructive, so we ignore --dry-run for it.
        history_show(o.show)
    elif o.reset:
        if o.dryrun:
            print(MSG.history_reset_dryrun_SSS % (table, dbname, hostname))
        else:
            print(dbschem.drop_table(table='history'))
    elif o.loadlist is not None:
        if o.dryrun:
            print(MSG.history_load_dryrun_SSSS %
                  (table, dbname, hostname, o.filename))
        else:
            history_load(o.loadlist, o.filename)
Beispiel #17
0
def crl_history(argv):
    """history - access to the plugin history

    usage: crawl history [--load|--show|--reset]

    --load {all,cv,mpra,tcc,rpt}
        Load the history table from listed plugin tables, log file

    --show
        Read the history table and report its contents.

    --reset
        Drop the history table.

    --read-log FILENAME
        If --load is specified and includes 'cv', read FILENAME and load cv
        history from it.

    To load just cv data, --load cv --read-log FILENAME
    To load just mpra data, --load mpra
    To load all plugins, --load all (or "") --read-log FILENAME

    If --load contains 'cv' but --read-log is not specified, an error message
    will be issued.

    If --load contains 'all' or is empty and --read-log is not specified, a
    warning will be issued to notify the user that cv data is not being loaded.

    If --load does not contain 'cv' or 'all' and is not empty and --read-log is
    specified, a warning will be issued that the log file is not being read and
    cv data is not being loaded.
    """
    p = optparse.OptionParser()
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-n',
                 '--dry-run',
                 action='store_true',
                 default=False,
                 dest='dryrun',
                 help='just report')
    p.add_option('-l',
                 '--load',
                 action='store',
                 default=None,
                 dest='loadlist',
                 help='plugins to load')
    p.add_option('-r',
                 '--read-log',
                 action='store',
                 default=None,
                 dest='filename',
                 help='log file for cv history')
    p.add_option('-R',
                 '--reset',
                 action='store_true',
                 default=False,
                 dest='reset',
                 help='drop the history table')
    p.add_option('-s',
                 '--show',
                 action='store',
                 default='unset',
                 dest='show',
                 help='Report the contents of the history table')
    (o, a) = p.parse_args(argv)

    if o.debug:
        pdb.set_trace()

    # This is saying, if any two of our primary command line options are set,
    # we have a problem since they are all mutually exclusive.
    if o.show == 'unset':
        o.show = None
    if any([
            all([o.loadlist is not None, o.reset]),
            all([o.loadlist is not None, o.show]),
            all([o.reset, o.show])
    ]):
        raise SystemExit(MSG.history_options)

    if o.dryrun:
        cfg = CrawlConfig.add_config()
        table = cfg.get('dbi-crawler', 'tbl_prefix') + '_history'
        dbname = cfg.get('dbi-crawler', 'dbname')
        hostname = cfg.get('dbi-crawler', 'hostname')

    if o.show:
        # This option is non-destructive, so we ignore --dry-run for it.
        history_show(o.show)
    elif o.reset:
        if o.dryrun:
            print(MSG.history_reset_dryrun_SSS % (table, dbname, hostname))
        else:
            print(dbschem.drop_table(table='history'))
    elif o.loadlist is not None:
        if o.dryrun:
            print(MSG.history_load_dryrun_SSSS %
                  (table, dbname, hostname, o.filename))
        else:
            history_load(o.loadlist, o.filename)
Beispiel #18
0
def cvv_show_next(argv):
    """show_next - Report the Checkables in the order they will be checked

    usage: cvtool show_next
    """
    p = optparse.OptionParser()
    p.add_option('-c',
                 '--config',
                 action='store',
                 default='',
                 dest='config',
                 help='alternate configuration')
    p.add_option('-d',
                 '--debug',
                 action='store_true',
                 default=False,
                 dest='debug',
                 help='run the debugger')
    p.add_option('-i',
                 '--id',
                 action='store',
                 default='',
                 dest='id',
                 help='id of entry to be checked')
    p.add_option('-l',
                 '--limit',
                 action='store',
                 default=-1,
                 dest='limit',
                 type=int,
                 help='max records to get')
    p.add_option('-p',
                 '--path',
                 action='store',
                 default='',
                 dest='path',
                 help='name of path to be checked')
    p.add_option('-v',
                 '--verbose',
                 action='store_true',
                 default=False,
                 dest='verbose',
                 help='more information')
    try:
        (o, a) = p.parse_args(argv)
    except SystemExit:
        return

    if o.debug:
        pdb.set_trace()

    if o.config:
        cfg = CrawlConfig.add_config(close=True, filename=o.config)
    else:
        cfg = CrawlConfig.add_config()

    if o.limit < 0:
        limit = int(cfg.get_d('cv', 'operations', '10'))
    else:
        limit = o.limit

    clist = Checkable.Checkable.get_list(limit)
    for c in clist:
        if c.last_check == 0:
            print("%18d %s %s" % (c.last_check, c.type, c.path))
        else:
            print("%s %s %s" % (U.ymdhms(c.last_check), c.type, c.path))
Beispiel #19
0
    def dispatch(self):
        """
        Figure out where we're supposed to send this alert and send it.
        Possible destinations are the log file, one or more e-mail addresses,
        and/or a shell program.

        It's also possible for a 'use' option to show up in the alerts section.
        In this case, we're being redirected to another section, also 'use' can
        also point to the current alerts section. There's no reason to ever do
        this, but it could happen so we want to handle it in a reasonable way.

        That's why we sort the config options in the while statement below --
        to make 'use' get handled last, so any other options in the section
        will get handled. Once we process 'use', anything not yet processed in
        the current section is ignored.
        """
        if self.cfg is not None:
            cfg = self.cfg
        else:
            cfg = CrawlConfig.add_config()
        if self.caller != '':
            section = cfg.get(self.caller, 'alerts')
        else:
            section = 'alerts'

        done = False
        while not done:
            for opt in sorted(cfg.options(section)):
                if opt == 'log':
                    # write to log
                    fmt = cfg.get(section, 'log')
                    CrawlConfig.log(fmt, self.msg)
                    done = True

                elif opt == 'shell':
                    # run the program
                    cmd = cfg.get(section, 'shell')
                    if '%s' in cmd:
                        cmdline = cmd % (self.msg)
                    else:
                        cmdline = cmd
                    os.system(cmdline)
                    CrawlConfig.log("ran: '%s'" % (cmdline))
                    done = True

                elif opt == 'email':
                    CrawlMail.send(cfg=cfg,
                                   to="%s.email" % section,
                                   subj="HPSS Integrity Crawler ALERT",
                                   msg=self.msg)
                    done = True

                elif opt == 'use':
                    # delegate to another section
                    done = True
                    new_section = cfg.get(section, 'use')

                    # if it's the same section, ignore the 'use', but we don't
                    # want to break the rule that all options after a 'use' are
                    # ignored. So we set done to True to terminate the while
                    # loop and break unconditionally at the end of this clause
                    # to get out of the for loop
                    if new_section != section:
                        section = new_section
                        done = False
                    break