def test_ctor_attrs(self):
     """
     Verify that a newly created Dimension object has the following
     attributes:
      - name (string)
      - sampsize (small float value, e.g., 0.005)
      - p_sum (empty dict)
      - s_sum (empty dict)
      - methods
         > sum_total
         > load
     """
     dimname = 'cos'
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     Checkable.Checkable.ex_nihilo()
     a = Dimension(name=dimname, sampsize=0.005)
     for attr in [
             'name',
             'sampsize',
             'p_sum',
             's_sum',
             'sum_total',
             'load',
     ]:
         self.assertTrue(
             hasattr(a, attr),
             "Object %s does not have expected attribute %s" % (a, attr))
 def test_sum_total(self):
     """
     Return the sum of all the 'count' values in either the p_sum or s_sum
     dictionary.
     """
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     Checkable.Checkable.ex_nihilo()
     a = Dimension(name='sum_total')
     a.p_sum = {
         '6001': {
             'count': 2,
             'pct': 50.0
         },
         '5081': {
             'count': 2,
             'pct': 50.0
         }
     }
     a.s_sum = {
         '6001': {
             'count': 2,
             'pct': 40.0
         },
         '5081': {
             'count': 3,
             'pct': 60.0
         }
     }
     self.expected(4, a.sum_total())
     self.expected(4, a.sum_total(dict=a.p_sum))
     self.expected(5, a.sum_total(which='s'))
     self.expected(5, a.sum_total(dict=a.s_sum))
Example #3
0
    def test_alert_email_mtcaller(self):
        """
        Generate an e-mail alert and verify that it was sent (this is where we
        use 'monkey patching'). For this case, caller is ''.
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        logfile = self.tmpdir('alert_email.log')
        targets = "[email protected], [email protected], [email protected]"
        payload = 'this is an e-mail alert'
        sender = 'hpssic@' + util.hostname(long=True)

        cfg = CrawlConfig.CrawlConfig()
        cfg.add_section('crawler')
        cfg.add_section('alerts')
        cfg.set('crawler', 'logpath', logfile)
        cfg.set('alerts', 'email', targets)
        CrawlConfig.log(logpath=logfile, close=True)

        x = Alert.Alert(caller='', msg=payload, cfg=cfg)
        m = fakesmtp.inbox[0]
        self.expected(targets, ', '.join(m.to_address))
        self.expected(m.from_address, sender)
        self.expected_in('sent mail to', util.contents(logfile))
        self.expected_in(payload, m.fullmessage)
 def test_ctor_attrs(self):
     """
     Verify that a newly created Dimension object has the following
     attributes:
      - name (string)
      - sampsize (small float value, e.g., 0.005)
      - p_sum (empty dict)
      - s_sum (empty dict)
      - methods
         > sum_total
         > load
     """
     dimname = 'cos'
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     Checkable.Checkable.ex_nihilo()
     a = Dimension(name=dimname,
                   sampsize=0.005)
     for attr in ['name',
                  'sampsize',
                  'p_sum',
                  's_sum',
                  'sum_total',
                  'load',
                  ]:
         self.assertTrue(hasattr(a, attr),
                         "Object %s does not have expected attribute %s" %
                         (a, attr))
Example #5
0
 def test_alert_shell_nospec(self):
     """
     Generate a shell alert and verify that it ran. With no '%s' in the
     shell alert string, no message should be offered for formatting.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_shell.log')
     outfile = self.tmpdir('alert_shell.out')
     runfile = self.tmpdir('runme')
     f = open(runfile, 'w')
     f.write("#!/bin/bash\n")
     f.write("echo \"ALERT: $*\" > %s\n" % outfile)
     f.close()
     os.chmod(
         runfile, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP
         | stat.S_IWGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'shell', runfile)
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest',
                     msg='this is a test message',
                     cfg=cfg)
     expected = "ran: '%s'" % runfile
     self.expected_in(expected, util.contents(logfile))
     self.assertPathPresent(outfile)
Example #6
0
    def test_alert_email_mtcaller(self):
        """
        Generate an e-mail alert and verify that it was sent (this is where we
        use 'monkey patching'). For this case, caller is ''.
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        logfile = self.tmpdir('alert_email.log')
        targets = "[email protected], [email protected], [email protected]"
        payload = 'this is an e-mail alert'
        sender = 'hpssic@' + util.hostname(long=True)

        cfg = CrawlConfig.CrawlConfig()
        cfg.add_section('crawler')
        cfg.add_section('alerts')
        cfg.set('crawler', 'logpath', logfile)
        cfg.set('alerts', 'email', targets)
        CrawlConfig.log(logpath=logfile, close=True)

        x = Alert.Alert(caller='', msg=payload,
                        cfg=cfg)
        m = fakesmtp.inbox[0]
        self.expected(targets, ', '.join(m.to_address))
        self.expected(m.from_address, sender)
        self.expected_in('sent mail to', util.contents(logfile))
        self.expected_in(payload, m.fullmessage)
Example #7
0
 def test_alert_shell_nospec(self):
     """
     Generate a shell alert and verify that it ran. With no '%s' in the
     shell alert string, no message should be offered for formatting.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_shell.log')
     outfile = self.tmpdir('alert_shell.out')
     runfile = self.tmpdir('runme')
     f = open(runfile, 'w')
     f.write("#!/bin/bash\n")
     f.write("echo \"ALERT: $*\" > %s\n" % outfile)
     f.close()
     os.chmod(runfile,
              stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
              stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP |
              stat.S_IROTH | stat.S_IXOTH)
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'shell', runfile)
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest', msg='this is a test message',
                     cfg=cfg)
     expected = "ran: '%s'" % runfile
     self.expected_in(expected, util.contents(logfile))
     self.assertPathPresent(outfile)
    def test_load_new(self):
        """
        With the database and checkables table in place, create a new Dimension
        that is not in the table. Calling load() on it should be a no-op -- the
        object should not be stored to the database and its contents should not
        be changed.
        """
        self.dbgfunc()
        # reboot the database and call persist() to create the table without
        # adding any data
        U.conditional_rm(self.dbname())
        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        Checkable.Checkable.ex_nihilo()

        ignore = Dimension(name='foobar')

        # get a Dimension object that is not in the table
        test = Dimension(name='notindb')
        # make a copy of the object for reference (not just a handle to the
        # same ojbect)
        ref = copy.deepcopy(test)

        # call load(), which should be a no op
        test.load()

        # verify that the object didn't change
        self.expected(ref.name, test.name)
        self.expected(ref.sampsize, test.sampsize)
        self.expected(ref.p_sum, test.p_sum)
        self.expected(ref.s_sum, test.s_sum)
    def test_load_new(self):
        """
        With the database and checkables table in place, create a new Dimension
        that is not in the table. Calling load() on it should be a no-op -- the
        object should not be stored to the database and its contents should not
        be changed.
        """
        self.dbgfunc()
        # reboot the database and call persist() to create the table without
        # adding any data
        U.conditional_rm(self.dbname())
        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        Checkable.Checkable.ex_nihilo()

        ignore = Dimension(name='foobar')

        # get a Dimension object that is not in the table
        test = Dimension(name='notindb')
        # make a copy of the object for reference (not just a handle to the
        # same ojbect)
        ref = copy.deepcopy(test)

        # call load(), which should be a no op
        test.load()

        # verify that the object didn't change
        self.expected(ref.name, test.name)
        self.expected(ref.sampsize, test.sampsize)
        self.expected(ref.p_sum, test.p_sum)
        self.expected(ref.s_sum, test.s_sum)
    def test_repr(self):
        """
        Method __repr__ should return <Dimension(name='foo')>.
        """

        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        exp = "Dimension(name='foo')"
        a = eval(exp)
        self.expected(exp, a.__repr__())
    def test_repr(self):
        """
        Method __repr__ should return <Dimension(name='foo')>.
        """

        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        exp = "Dimension(name='foo')"
        a = eval(exp)
        self.expected(exp, a.__repr__())
    def test_load_already(self):
        """
        With the database and a checkables table in place and records in the
        table, calling load() on a Dimension should load the information from
        the table into the object. However, it should only count records where
        last_check <> 0.
        """
        self.dbgfunc()
        U.conditional_rm(self.dbname())
        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        Checkable.Checkable.ex_nihilo()
        chk = Checkable.Checkable
        testdata = [
            chk(rowid=1, path="/abc/001", type='f', cos='6001', checksum=0,
                last_check=0),
            chk(rowid=2, path="/abc/002", type='f', cos='6002', checksum=0,
                last_check=5),
            chk(rowid=3, path="/abc/003", type='f', cos='6003', checksum=1,
                last_check=0),
            chk(rowid=4, path="/abc/004", type='f', cos='6001', checksum=1,
                last_check=17),
            chk(rowid=5, path="/abc/005", type='f', cos='6002', checksum=0,
                last_check=0),
            chk(rowid=6, path="/abc/006", type='f', cos='6003', checksum=0,
                last_check=8),
            chk(rowid=7, path="/abc/007", type='f', cos='6001', checksum=0,
                last_check=0),
            chk(rowid=8, path="/abc/008", type='f', cos='6002', checksum=0,
                last_check=19),
            chk(rowid=9, path="/abc/009", type='f', cos='6003', checksum=0,
                last_check=0),
            ]

        # insert some test data into the table
        for t in testdata:
            t.persist()

        # get a default Dimension with the same name as the data in the table
        q = Dimension(name='cos')
        # this should load the data from the table into the object
        q.load()

        # verify the loaded data in the object
        self.expected('cos', q.name)
        self.assertTrue('6001' in q.p_sum.keys(),
                        "Expected '6001' in p_sum.keys()")
        self.assertTrue('6002' in q.p_sum.keys(),
                        "Expected '6001' in p_sum.keys()")
        self.assertTrue('6003' in q.p_sum.keys(),
                        "Expected '6003' in p_sum.keys()")
        self.assertTrue('6001' in q.s_sum.keys(),
                        "Expected '6001' in s_sum.keys()")
        self.assertTrue('6002' in q.s_sum.keys(),
                        "Expected '6002' in s_sum.keys()")
        self.assertTrue('6003' in q.s_sum.keys(),
                        "Expected '6003' in s_sum.keys()")
Example #13
0
def main(cfg):
    """
    Plugin example
    """
    try:
        msg = cfg.get('example', 'message')
    except ConfigParser.NoOptionError:
        msg = 'No message in configuration'

    CrawlConfig.log('EXAMPLE: This is plugin EXAMPLE saying "%s"' % msg)
Example #14
0
def main(cfg):
    """
    Migration Purge Record Ager (mpra) reads the database tables BFMIGRREC and
    BFPURGEREC and reports migration and purge records that are older than the
    age specified in the configuration.
    """
    if cfg is None:
        cfg = CrawlConfig.get_config()
    age = cfg.get_time('mpra', 'age')

    end = time.time() - age

    start = mpra_lib.mpra_fetch_recent("migr")
    #
    # If the configured age has been moved back in time, so that end is before
    # start, we need to reset and start scanning from the beginning of time.
    #
    if end < start:
        start = 0
    CrawlConfig.log("migr recs after %d (%s) before %d (%s)" %
                    (start, util.ymdhms(start), end, util.ymdhms(end)))
    result = mpra_lib.age("migr", start=start, end=end, mark=True)
    CrawlConfig.log("found %d migration records in the range" % result)
    rval = result

    start = mpra_lib.mpra_fetch_recent("purge")
    CrawlConfig.log("Looking for expired purge locks")
    result = mpra_lib.xplocks(mark=True)
    CrawlConfig.log("found %d expired purge locks" % result)
    rval += result

    return rval
 def test_ctor_defaults(self):
     """
     A new Dimension with only the name specified should have the right
     defaults.
     """
     dimname = 'cos'
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     a = Dimension(name=dimname)
     self.expected(dimname, a.name)
     self.expected(0.01, a.sampsize)
     self.expected({}, a.p_sum)
     self.expected({}, a.s_sum)
 def test_ctor_defaults(self):
     """
     A new Dimension with only the name specified should have the right
     defaults.
     """
     dimname = 'cos'
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     a = Dimension(name=dimname)
     self.expected(dimname, a.name)
     self.expected(0.01, a.sampsize)
     self.expected({}, a.p_sum)
     self.expected({}, a.s_sum)
    def test_ctor_bad_attr(self):
        """
        Attempting to create a Dimension with attrs that are not in the
        settable list should get an exception.
        """
        dimname = 'bad_attr'
        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        got_exception = False
        self.assertRaisesMsg(StandardError,
                             "Attribute 'catl' is not valid",
                             Dimension, name=dimname, catl=[1, 2, 3])

        self.assertRaisesMsg(StandardError,
                             "Attribute 'aardvark' is not valid",
                             Dimension, name=dimname, aardvark='Fanny Brice')
Example #18
0
 def test_html_report(self):
     """
     Try running 'html report > filename' and verify that 1) no traceback
     occurs and 2) something is actually written to the output file.
     """
     self.dbgfunc()
     cfpath = self.tmpdir("crawl.cfg")
     cfg = CrawlConfig.add_config()
     cfg.crawl_write(open(cfpath, 'w'))
     cmd = "html report --config %s" % cfpath
     CrawlConfig.log(cmd, close=True)
     result = pexpect.run(cmd)
     if "HPSS Unavailable" in result:
         pytest.skip("HPSS Unavailable")
     self.validate_report(result)
Example #19
0
 def test_html_report(self):
     """
     Try running 'html report > filename' and verify that 1) no traceback
     occurs and 2) something is actually written to the output file.
     """
     self.dbgfunc()
     cfpath = self.tmpdir("crawl.cfg")
     cfg = CrawlConfig.add_config()
     cfg.crawl_write(open(cfpath, 'w'))
     cmd = "html report --config %s" % cfpath
     CrawlConfig.log(cmd, close=True)
     result = pexpect.run(cmd)
     if "HPSS Unavailable" in result:
         pytest.skip("HPSS Unavailable")
     self.validate_report(result)
Example #20
0
    def test_env_set_pre(self):
        """
        TEST: set predefined environment variable from [env] entry
        unconditionally

        EXP: the old value gets overwritten
        """
        self.dbgfunc()
        sname = 'env'
        evname = 'UTIL_TEST'
        pre_val = "one:two:three"
        add = "four:five:six"
        exp = add

        # make sure the target env variable is set to a known value
        with util.tmpenv(evname, pre_val):
            # create a config object with an 'env' section and a non-'+' option
            cfg = CrawlConfig.CrawlConfig()
            cfg.add_section(sname)
            cfg.set(sname, evname, add)

            # pass the config object to util.env_update()
            util.env_update(cfg)

            # verify that the target env variable now contains the new value
            # and the old value is gone
            self.expected(exp, os.environ[evname])
            self.assertTrue(
                pre_val not in os.environ[evname],
                "The old value should be gone but still seems " +
                "to be hanging around")
Example #21
0
    def test_env_add_folded_pre(self):
        """
        TEST: add to a preset environment variable from a folded [env]
        entry

        EXP: the value gets set to the payload with the whitespace squeezed out
        """
        self.dbgfunc()
        sname = 'env'
        evname = 'UTIL_TEST'
        pre_val = "one:two:three"
        add = "four:\n   five:\n   six"
        exp = ":".join([pre_val, re.sub("\n\s*", "", add)])

        # make sure the target env variable has the expected value
        with util.tmpenv(evname, pre_val):
            # create a config object with an 'env' section and a folded '+'
            # option
            cfg = CrawlConfig.CrawlConfig()
            cfg.add_section(sname)
            cfg.set(sname, evname, '+' + add)

            # pass the config object to util.env_update()
            util.env_update(cfg)

            # verify that the variable was set to the expected value
            self.expected(exp, os.environ[evname])
Example #22
0
    def test_env_add_pre(self):
        """
        TEST: add to a predefined environment variable from [env] entry

        EXP: payload is appended to the old value
        """
        self.dbgfunc()
        sname = 'env'
        evname = 'UTIL_TEST'
        pre_val = "one:two:three"
        add = "four:five:six"
        exp = ":".join([pre_val, add])

        # make sure the target env variable is set to a known value
        with util.tmpenv(evname, pre_val):
            # create a config object with an 'env' section and a '+' option
            cfg = CrawlConfig.CrawlConfig()
            cfg.add_section(sname)
            cfg.set(sname, evname, "+" + add)

            # pass the config object to util.env_update()
            util.env_update(cfg)

            # verify that the target env variable now contains both old and
            # added values
            self.expected(exp, os.environ[evname])
Example #23
0
    def test_env_add_folded_none(self):
        """
        TEST: add to an undefined environment variable from a folded [env]
        entry

        EXP: the value gets set to the payload with the whitespace squeezed out
        """
        self.dbgfunc()
        sname = 'env'
        evname = 'UTIL_TEST'
        add = "four:\n   five:\n   six"
        exp = re.sub("\n\s*", "", add)

        # make sure the target env variable is not defined
        with util.tmpenv(evname, None):
            # create a config object with an 'env' section and a '+' option
            cfg = CrawlConfig.CrawlConfig()
            cfg.add_section(sname)
            cfg.set(sname, evname, '+' + add)

            # pass the config object to util.env_update()
            util.env_update(cfg)

            # verify that the variable was set to the expected value
            self.expected(exp, os.environ[evname])
Example #24
0
    def test_env_set_folded_none(self):
        """
        TEST: set undefined environment variable from a folded [env] entry
        unconditionally

        EXP: the value gets set
        """
        self.dbgfunc()
        sname = 'env'
        evname = 'UTIL_TEST'
        newval = "one:\n   two:\n   three"
        exp = re.sub("\n\s*", "", newval)

        # make sure the target env variable is not defined
        with util.tmpenv(evname, None):
            # create a config object with an 'env' section and a non-'+' option
            cfg = CrawlConfig.CrawlConfig()
            cfg.add_section(sname)
            cfg.set(sname, evname, newval)

            # pass the config object to util.env_update()
            util.env_update(cfg)

            # verify that the variable was set to the expected value
            self.expected(exp, os.environ[evname])
Example #25
0
    def test_ctor_reset_atime_cfg_true(self):
        """
        If reset_atime is specified in the config as True, it should be True
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        self.write_cfg_file(cf_name, self.cfg_d)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False)
        self.expected(True, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
Example #26
0
def main(cfg):
    """
    Generate an html-formatted report and store it at the designated location
    """
    CrawlConfig.log("html_plugin starting")
    fpath = cfg.get('html', 'output_path')
    rpt = html_lib.get_html_report(cfg=cfg)

    npath = fpath + '.new'
    opath = fpath + '.old'
    with open(npath, 'w') as out:
        out.write(rpt)

    if os.path.exists(fpath):
        os.rename(fpath, opath)
    os.rename(npath, fpath)
    CrawlConfig.log("html_plugin finished")
Example #27
0
def main(cfg):
    """
    HSI demo
    """
    CrawlConfig.log("hsi-demo: sending output to hsi.out")
    hsi_prompt = "]:"

    S = pexpect.spawn("/opt/public/bin/hsi")
    S.logfile = f = open("hsi.out", 'a')
    S.expect(hsi_prompt)
    S.sendline("ls")

    S.expect(hsi_prompt)
    S.sendline("quit")

    S.expect(pexpect.EOF)
    S.logfile.close()
    S.close()
Example #28
0
 def test_alert_log(self):
     """
     Generate a log alert and verify that the message was written to the
     correct log file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_log.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest', msg='this is a test message',
                     cfg=cfg)
     self.expected_in('this is a test message', util.contents(logfile))
 def test_sum_total(self):
     """
     Return the sum of all the 'count' values in either the p_sum or s_sum
     dictionary.
     """
     CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
     Checkable.Checkable.ex_nihilo()
     a = Dimension(name='sum_total')
     a.p_sum = {'6001': {'count': 2, 'pct': 50.0},
                '5081': {'count': 2, 'pct': 50.0}
                }
     a.s_sum = {'6001': {'count': 2, 'pct': 40.0},
                '5081': {'count': 3, 'pct': 60.0}
                }
     self.expected(4, a.sum_total())
     self.expected(4, a.sum_total(dict=a.p_sum))
     self.expected(5, a.sum_total(which='s'))
     self.expected(5, a.sum_total(dict=a.s_sum))
Example #30
0
def test_maybe_update_hsi_cant(muh_prep, tmpdir):
    """
    If we don't have write permission on the target, then even if we should
    update, we can't. In this case, should log a message.
    """
    pytest.dbgfunc()
    lp = tmpdir.join('crawl.test.log')
    rf = test_maybe_update_hsi_cant
    path = ":".join([rf.bin.strpath, rf.hsihome])
    with U.tmpenv('PATH', path):
        CrawlConfig.log(logpath=lp.strpath, close=True)
        hpss.maybe_update_hsi()
    c = rf.file.read()
    assert 'not changed' in c
    assert os.path.exists(lp.strpath)
    c = lp.read()
    assert MSG.hsi_wrap_ood in c
    CrawlConfig.log(close=True)
Example #31
0
 def test_alert_log(self):
     """
     Generate a log alert and verify that the message was written to the
     correct log file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_log.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     x = Alert.Alert(caller='AlertTest',
                     msg='this is a test message',
                     cfg=cfg)
     self.expected_in('this is a test message', util.contents(logfile))
    def test_ctor_bad_attr(self):
        """
        Attempting to create a Dimension with attrs that are not in the
        settable list should get an exception.
        """
        dimname = 'bad_attr'
        CrawlConfig.add_config(close=True, dct=self.cfg_dict(U.my_name()))
        got_exception = False
        self.assertRaisesMsg(StandardError,
                             "Attribute 'catl' is not valid",
                             Dimension,
                             name=dimname,
                             catl=[1, 2, 3])

        self.assertRaisesMsg(StandardError,
                             "Attribute 'aardvark' is not valid",
                             Dimension,
                             name=dimname,
                             aardvark='Fanny Brice')
Example #33
0
    def test_ctor_reset_atime_call_true(self):
        """
        If reset_atime is specified in the call as True, it should be True,
        even if it's specified as False in the config
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        cfg = copy.deepcopy(self.cfg_d)
        cfg['cv']['reset_atime'] = 'no'
        self.write_cfg_file(cf_name, cfg)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False, reset_atime=True)
        self.expected(True, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
Example #34
0
    def test_ctor_reset_atime_default(self):
        """
        If reset_atime is not specified in the config or argument list, it
        should default to False
        """
        cf_name = self.tmpdir(util.my_name() + ".cfg")

        # write out a config file with no reset_atime spec
        cd = copy.deepcopy(self.cfg_d)
        del cd['cv']['reset_atime']
        self.write_cfg_file(cf_name, cd)

        # make the test config the default
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # get an hpss.HSI object and check its reset_atime attribute
        h = hpss.HSI(connect=False)
        self.expected(False, h.reset_atime)

        CrawlConfig.get_config(reset=True, soft=True)
Example #35
0
 def test_dlog(self):
     """
     Test method dlog on daemon object
     """
     lfname = self.tmpdir('daemon.dlog.log')
     lf = CrawlConfig.log(logpath=lfname)
     a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf)
     logmsg = "testing the dlog method of %s" % a
     a.dlog(logmsg)
     self.assertTrue(
         logmsg in util.contents(lfname), "Expected '%s' in '%s'" %
         (logmsg, util.line_quote(util.contents(lfname))))
Example #36
0
 def test_alert_use_other(self):
     """
     A use directive sends us to another config section where we generate a
     log alert and verify that the message was written to the correct log
     file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_use.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.add_section('other_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'use', "other_section")
     cfg.set('other_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     payload = 'this is a test message from %s' % util.my_name()
     x = Alert.Alert(caller='AlertTest', msg=payload, cfg=cfg)
     self.expected_in(payload, util.contents(logfile))
Example #37
0
 def test_ctor_no_cv_section(self):
     """
     If there is no cv section in the config, reset_atime and hash_algorithm
     should take on their default values.
     """
     self.dbgfunc()
     cfg = copy.deepcopy(self.cfg_d)
     del cfg['cv']
     zcfg = CrawlConfig.add_config(close=True, dct=cfg)
     self.assertFalse(zcfg.has_section('cv'))
     h = hpss.HSI(connect=False)
     self.expected(False, h.reset_atime)
     self.expected(None, h.hash_algorithm)
Example #38
0
 def test_alert_use_other(self):
     """
     A use directive sends us to another config section where we generate a
     log alert and verify that the message was written to the correct log
     file.
     """
     self.dbgfunc()
     logfile = self.tmpdir('alert_use.log')
     cfg = CrawlConfig.CrawlConfig()
     cfg.add_section('crawler')
     cfg.add_section('AlertTest')
     cfg.add_section('alert_section')
     cfg.add_section('other_section')
     cfg.set('crawler', 'logpath', logfile)
     cfg.set('AlertTest', 'alerts', 'alert_section')
     cfg.set('alert_section', 'use', "other_section")
     cfg.set('other_section', 'log', "%s")
     CrawlConfig.log(logpath=logfile, close=True)
     payload = 'this is a test message from %s' % util.my_name()
     x = Alert.Alert(caller='AlertTest', msg=payload,
                     cfg=cfg)
     self.expected_in(payload, util.contents(logfile))
Example #39
0
    def test_alert_email_defcfg(self):
        """
        Generate an e-mail alert using the default config and verify that it
        was sent (this is where we use 'monkey patching').
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        CrawlConfig.add_config(close=True)
        # with U.tmpenv('CRAWL_CONF', 'hpssic_test.cfg'):
        with U.tmpenv('CRAWL_CONF', None):
            logfile = self.tmpdir('alert_email.log')
            targets = "[email protected], [email protected]"
            payload = 'this is an e-mail alert'
            sender = 'hpssic@' + util.hostname(long=True)
            CrawlConfig.log(logpath=logfile, close=True)

            x = Alert.Alert(caller='cv', msg=payload)
            m = fakesmtp.inbox[0]
            self.expected(', '.join(m.to_address), targets)
            self.expected(m.from_address, sender)
            self.expected_in('sent mail to', util.contents(logfile))
            self.expected_in(payload, m.fullmessage)
Example #40
0
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval
Example #41
0
 def test_dlog(self):
     """
     Test method dlog on daemon object
     """
     lfname = self.tmpdir('daemon.dlog.log')
     lf = CrawlConfig.log(logpath=lfname)
     a = daemon.Daemon(self.tmpdir("daemon_pid"), logger=lf)
     logmsg = "testing the dlog method of %s" % a
     a.dlog(logmsg)
     self.assertTrue(logmsg in util.contents(lfname),
                     "Expected '%s' in '%s'" %
                     (logmsg,
                      util.line_quote(util.contents(lfname))))
Example #42
0
    def test_alert_email_defcfg(self):
        """
        Generate an e-mail alert using the default config and verify that it
        was sent (this is where we use 'monkey patching').
        """
        self.dbgfunc()
        fakesmtp.inbox = []
        CrawlConfig.add_config(close=True)
        # with U.tmpenv('CRAWL_CONF', 'hpssic_test.cfg'):
        with U.tmpenv('CRAWL_CONF', None):
            logfile = self.tmpdir('alert_email.log')
            targets = "[email protected], [email protected]"
            payload = 'this is an e-mail alert'
            sender = 'hpssic@' + util.hostname(long=True)
            CrawlConfig.log(logpath=logfile, close=True)

            x = Alert.Alert(caller='cv', msg=payload)
            m = fakesmtp.inbox[0]
            self.expected(', '.join(m.to_address), targets)
            self.expected(m.from_address, sender)
            self.expected_in('sent mail to', util.contents(logfile))
            self.expected_in(payload, m.fullmessage)
Example #43
0
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval
Example #44
0
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get(
            'rpt',
            'subject'), time.strftime("%Y.%m%d %H:%M:%S", time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
Example #45
0
def main(cfg):
    """
    This plugin will generate a report and send it to the designated e-mail
    address(es).
    """
    rval = 0
    try:
        if cfg is None:
            cfg = CrawlConfig.get_config()

        subject = "%s %s" % (cfg.get('rpt', 'subject'),
                             time.strftime("%Y.%m%d %H:%M:%S",
                                           time.localtime()))

        CrawlMail.send(sender=cfg.get('rpt', 'sender'),
                       to='rpt.recipients',
                       subj=subject,
                       msg=rpt_lib.get_report())
    except Exception as e:
        rval = 1
        CrawlConfig.log("Failure in rpt_lib: '%s'" % str(e))

    return rval
Example #46
0
    def check_hash_algorithm(self, cf_stem, alg, checkfor=None):
        """
        With hash_algorithm = *alg* in config
        """
        if checkfor is None:
            checkfor = alg

        # generate a config file and make it the default config
        cf_name = self.tmpdir(cf_stem + ".cfg")
        cd = copy.deepcopy(self.cfg_d)
        if alg == '(none)':
            del cd['cv']['hash_algorithm']
        else:
            cd['cv']['hash_algorithm'] = alg
        self.write_cfg_file(cf_name, cd)
        CrawlConfig.get_config(cfname=cf_name, reset=True)

        # Get an hsi object
        testfile = self.plist[1]
        try:
            h = hpss.HSI()
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))

        # if necessary, delete any hash on the test file
        result = h.hashlist(testfile)
        if "(none)" not in result:
            h.hashdelete(testfile)

        # generate a hash on the test file
        h.hashcreate(testfile)

        # verify that the hash created is of the proper type
        result = h.hashlist(testfile)
        self.expected_in(checkfor, result)
Example #47
0
    def test_get_html_report(self):
        """
        Call html_lib.get_html_report() directly
        """
        self.dbgfunc()

        c = CrawlConfig.add_config()

        db = CrawlDBI.DBI(dbtype="crawler")
        dbschem.drop_table(table="lscos")
        self.expected(False, db.table_exists(table="lscos"))

        try:
            result = html_lib.get_html_report('')
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))

        self.expected(True, db.table_exists(table="lscos"))
        db.close()
        self.validate_report(result)
Example #48
0
def main(cfg):
    """
    Main entry point for the cv plugin
    """
    # Get stuff we need -- the logger object, dataroot, etc.
    CrawlConfig.log("firing up")
    plugdir = cfg.get('crawler', 'plugin-dir')
    dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot'))
    odds = cfg.getfloat(plugin_name, 'odds')
    n_ops = int(cfg.get(plugin_name, 'operations'))

    # Initialize our statistics
    (t_checksums, t_matches, t_failures) = get_stats()
    (checksums, matches, failures) = (0, 0, 0)

    # Fetch the list of HPSS objects that we're looking at from the
    # database
    try:
        clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot)
    except CrawlDBI.DBIerror as e:
        if any([util.rgxin(msg, str(e))
                for msg in ["no such table: checkables",
                            "Table '.*' doesn't exist"]]):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise
    except StandardError as e:
        if 'Please call .ex_nihilo()' in str(e):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise

    # We're going to process n_ops things in the HPSS namespace
    for op in range(n_ops):
        # if the list from the database is empty, there's nothing to do
        if 0 < len(clist):
            # but it's not, so grab the first item and check it
            item = clist.pop(0)
            CrawlConfig.log("[%d] checking %s" % (item.rowid, item))
            ilist = item.check()

            # Expected outcomes that check can return:
            #  list of Checkables: read dir or checksummed files (may be empty)
            #  Alert:              checksum verify failed
            #  'access denied':    unaccessible directory
            #  'matched':          a checksum was verified
            #  'checksummed':      file was checksummed
            #  'skipped':          file was skipped
            #  'unavailable':      HPSS is temporarily unavailable
            #  StandardError:      invalid Checkable type (not 'f' or 'd')
            #
            if type(ilist) == str:
                if ilist == "access denied":
                    CrawlConfig.log("dir %s not accessible" % item.path)
                    # clist.remove(item)
                elif ilist == "matched":
                    matches += 1
                    CrawlConfig.log("%s checksums matched" % item.path)
                elif ilist == "checksummed":
                    # checksums += 1
                    CrawlConfig.log("%s checksummed" % item.path)
                elif ilist == "skipped":
                    CrawlConfig.log("%s skipped" % item.path)
                elif ilist == "unavailable":
                    CrawlConfig.log("HPSS is not available")
                    break
                else:
                    CrawlConfig.log("unexpected string returned " +
                                    "from Checkable: '%s'" % ilist)
            elif type(ilist) == list:
                CrawlConfig.log("in %s, found:" % item)
                for n in ilist:
                    CrawlConfig.log(">>> %s" % str(n))
                    if 'f' == n.type and n.checksum != 0:
                        CrawlConfig.log(".. previously checksummed")
                        # checksums += 1
            elif isinstance(ilist, Checkable.Checkable):
                CrawlConfig.log("Checkable returned - file checksummed" +
                                " - %s, %s" % (ilist.path, ilist.checksum))
                # checksums += 1
            elif isinstance(ilist, Alert.Alert):
                CrawlConfig.log("Alert generated: '%s'" %
                                ilist.msg())
                failures += 1
            else:
                CrawlConfig.log("unexpected return val from " +
                                "Checkable.check: %s: %r" %
                                (type(ilist), ilist))

    # Report the statistics in the log
    # ** For checksums, we report the current total minus the previous
    # ** For matches and failures, we counted them up during the iteration
    # ** See the description of get_stats for why we don't store total
    #    checksums
    p_checksums = t_checksums
    t_matches += matches
    t_failures += failures
    cv_lib.update_stats((t_matches, t_failures))

    (t_checksums, t_matches, t_failures) = get_stats()
    CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) +
                    "checksums matched: %d; " % matches +
                    "failures: %d" % failures)
    CrawlConfig.log("totals checksummed: %d; " % t_checksums +
                    "matches: %d; " % t_matches +
                    "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures
Example #49
0
                                       last_obj_id,
                                       last_obj_id,
                                       correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount


# -----------------------------------------------------------------------------
def tcc_priority(globspec, cosinfo):
    """
    Handle any files matching globspec. Return the number of files processed.
    """
    rval = 0
    cfg = CrawlConfig.get_config()
    pri_compdir = cfg.get_d(tcc_lib.sectname(), 'completed', '/tmp')
    for filepath in glob.glob(globspec):
        tcc_lib.check_file(filepath, verbose=False, plugin=True)
        cpath = U.pathjoin(pri_compdir, U.basename(filepath))
        os.rename(filepath, cpath)

    return rval


# -----------------------------------------------------------------------------
if __name__ == '__main__':
    main(CrawlConfig.get_config())
Example #50
0
def main(cfg):
    """
    Tape Copy Checker retrieves the necessary information from the DB2 database
    to find files where the number of copies stored may not match the number
    called for by the COS.
    """
    # retrieve configuration items as needed
    how_many = int(cfg.get_d(tcc_lib.sectname(), 'operations', 10))
    CrawlConfig.log("tape-copy-checker: firing up for %d items" % how_many)

    # retrieve COS info
    cosinfo = tcc_lib.get_cos_info()

    # check for priority file(s)
    pri_glob = cfg.get_d(tcc_lib.sectname(), 'priority', '')
    if pri_glob != '':
        if 0 < tcc_priority(pri_glob, cosinfo):
            return

    # get the nsobject_id of the next bitfile to process from mysql
    next_nsobj_id = tcc_lib.get_next_nsobj_id(cfg)
    CrawlConfig.log("next nsobject id = %d" % next_nsobj_id)

    # fetch the next N bitfiles from DB2
    CrawlConfig.log("looking for nsobject ids between %d and %d"
                    % (next_nsobj_id, next_nsobj_id+how_many-1))
    try:
        bfl = tcc_lib.get_bitfile_set(int(next_nsobj_id),
                                      how_many)
    except U.HpssicError as e:
        bfl = []
        pass

    CrawlConfig.log("got %d bitfiles" % len(bfl))

    errcount = 0
    if len(bfl) == 0:
        for oid in range(next_nsobj_id, next_nsobj_id+how_many):
            tcc_lib.record_checked_ids(cfg, oid, oid, 1, 0)
            if cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("Object %d is not complete" % oid)
                errcount += 1
    else:
        # for each bitfile, if it does not have the right number of copies,
        # report it
        for bf in bfl:
            correct = 1
            error = 0
            if bf['SC_COUNT'] != cosinfo[bf['BFATTR_COS_ID']]:
                tcc_lib.tcc_report(bf, cosinfo)
                correct = 0
                error = 1
                CrawlConfig.log("%s %s %d != %d" %
                                (bf['OBJECT_ID'],
                                 tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'],
                                 cosinfo[bf['BFATTR_COS_ID']]))
            elif cfg.getboolean(tcc_lib.sectname(), 'verbose'):
                CrawlConfig.log("%s %s %d == %d" %
                                (bf['OBJECT_ID'],
                                 tcc_lib.hexstr(bf['BFID']),
                                 bf['SC_COUNT'],
                                 cosinfo[bf['BFATTR_COS_ID']]))

            last_obj_id = int(bf['OBJECT_ID'])
            tcc_lib.record_checked_ids(cfg,
                                       last_obj_id,
                                       last_obj_id,
                                       correct,
                                       error)
            errcount += error

        CrawlConfig.log("last nsobject in range: %d" % last_obj_id)

    return errcount