Beispiel #1
0
def main(cfg):
    """
    Migration Purge Record Ager (mpra) reads the database tables BFMIGRREC and
    BFPURGEREC and reports migration and purge records that are older than the
    age specified in the configuration.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    age = cfg.get_time('mpra', 'age')

    end = time.time() - age

    start = mpra_lib.mpra_fetch_recent("migr")
    #
    # If the configured age has been moved back in time, so that end is before
    # start, we need to reset and start scanning from the beginning of time.
    #
    if end < start:
        start = 0
    CrawlConfig.log("migr recs after %d (%s) before %d (%s)" %
                    (start, util.ymdhms(start), end, util.ymdhms(end)))
    result = mpra_lib.age("migr", start=start, end=end, mark=True)
    CrawlConfig.log("found %d migration records in the range" % result)
    rval = result

    start = mpra_lib.mpra_fetch_recent("purge")
    CrawlConfig.log("Looking for expired purge locks")
    result = mpra_lib.xplocks(mark=True)
    CrawlConfig.log("found %d expired purge locks" % result)
    rval += result

    return rval
Beispiel #2
0
    def test_ctor_reset_atime_cfg_true(self):
        """
        If reset_atime is specified in the config as True, it should be True
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        self.write_cfg_file(cf_name, self.cfg_d)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False)
        self.expected(True, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
Beispiel #3
0
    def test_ctor_reset_atime_call_true(self):
        """
        If reset_atime is specified in the call as True, it should be True,
        even if it's specified as False in the config
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        cfg = copy.deepcopy(self.cfg_d)
        cfg['cv']['reset_atime'] = 'no'
        self.write_cfg_file(cf_name, cfg)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False, reset_atime=True)
        self.expected(True, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
Beispiel #4
0
    def test_ctor_reset_atime_default(self):
        """
        If reset_atime is not specified in the config or argument list, it
        should default to False
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        cd = copy.deepcopy(self.cfg_d)
        del cd['cv']['reset_atime']
        self.write_cfg_file(cf_name, cd)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False)
        self.expected(False, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval
Beispiel #6
0
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval
Beispiel #7
0
    def check_hash_algorithm(self, cf_stem, alg, checkfor=None):
        """
        With hash_algorithm = *alg* in config
        """
        if checkfor is None:
            checkfor = alg

        # generate a config file and make it the default config
        cf_name = self.tmpdir(cf_stem + ".cfg")
        cd = copy.deepcopy(self.cfg_d)
        if alg == '(none)':
            del cd['cv']['hash_algorithm']
        else:
            cd['cv']['hash_algorithm'] = alg
        self.write_cfg_file(cf_name, cd)
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # Get an hsi object
        testfile = self.plist[1]
        try:
            h = hpss.HSI()
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))

        # if necessary, delete any hash on the test file
        result = h.hashlist(testfile)
        if "(none)" not in result:
            h.hashdelete(testfile)

        # generate a hash on the test file
        h.hashcreate(testfile)

        # verify that the hash created is of the proper type
        result = h.hashlist(testfile)
        self.expected_in(checkfor, result)
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get('rpt', 'subject'),
                             time.strftime("%Y.%m%d %H:%M:%S",
                                           time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
Beispiel #9
0
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get(
            'rpt',
            'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
Beispiel #10
0
                                       last_obj_id,
                                       last_obj_id,
                                       correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount


# -----------------------------------------------------------------------------
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    main(CrawlConfig.get_config())
Beispiel #11
0
                    "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures


# -----------------------------------------------------------------------------
def get_stats():
    """
    Return the number of files checksummed, checksums matched, and checksums
    failed.

    Matches and failures are stored in the cvstats table but total checksum
    count is retrieved from the checkables table by counting records with
    checksum = 1. This avoids discrepancies where the checksum count in cvstats
    might get out of synch with the records in checkables.
    """
    checksums = cv_lib.get_checksum_count()
    (matches, failures) = cv_lib.get_match_fail_count()
    return(checksums, matches, failures)


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    cfg = CrawlConfig.get_config()
    main(cfg)
Beispiel #12
0
                    "matches: %d; " % t_matches + "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures


# -----------------------------------------------------------------------------
def get_stats():
    """
    Return the number of files checksummed, checksums matched, and checksums
    failed.

    Matches and failures are stored in the cvstats table but total checksum
    count is retrieved from the checkables table by counting records with
    checksum = 1. This avoids discrepancies where the checksum count in cvstats
    might get out of synch with the records in checkables.
    """
    checksums = cv_lib.get_checksum_count()
    (matches, failures) = cv_lib.get_match_fail_count()
    return (checksums, matches, failures)


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    cfg = CrawlConfig.get_config()
    main(cfg)
Beispiel #13
0
            last_obj_id = int(bf['OBJECT_ID'])
            tcc_lib.record_checked_ids(cfg, last_obj_id, last_obj_id, correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount


# -----------------------------------------------------------------------------
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    main(CrawlConfig.get_config())