예제 #1
0
    def test_hashdelete_ok_str(self):
        """
        If hashdelete gets a string argument, it should work
        """
        try:
            paths = self.paths + " %s/hashnot" % self.hdir
            h = hpss.HSI(verbose=("verbose" in testhelp.testargs()))
            # make sure the hashables all have a checksum stored
            x = h.hashlist(self.plist)
            for path in self.plist:
                if util.rgxin("\(?none\)?  %s" % path, x):
                    h.hashcreate(path)

            # run hashdelete on the string
            result = h.hashdelete(paths)
            h.quit()

            # verify the results
            self.expected_in("hashdelete", result)
            for path in self.paths.split():
                exp = "hash deleted: \(?md5\)? %s" % path
                self.expected_in(exp, result)
            exp = "hash deleted: \(?md5\)? %s/hashnot" % self.hdir
            self.assertFalse(util.rgxin(exp, result),
                             "'%s' not expected in %s" %
                             (exp, util.line_quote(result)))
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))
예제 #2
0
    def test_hashlist_ok_str(self):
        """
        If hashlist gets a string argument, it should work
        """
        try:
            paths = self.paths + " %s/hashnot" % self.hdir
            h = hpss.HSI(verbose=("verbose" in testhelp.testargs()))
            # make sure the hashables all have a checksum stored
            x = h.hashlist(self.plist)
            for path in self.plist:
                if util.rgxin("\(?none\)?  %s" % path, x):
                    h.hashcreate(path)

            # run the test payload
            result = h.hashlist(paths)
            h.quit()
            self.expected_in("hashlist", result)
            for path in self.paths.split():
                exp = "\(?md5\)? %s" % path
                self.expected_in(exp, result)
            exp = "\(?none\)?  %s/hashnot" % self.hdir
            self.expected_in(exp, result)
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))
예제 #3
0
    def test_hashdelete_ok_list(self):
        """
        If hashdelete get a list argument, it should work
        """
        try:
            plist = self.plist + [self.hdir + "/hashnot"]
            h = hpss.HSI(verbose=("verbose" in testhelp.testargs()))
            # make sure the hashables all have a checksum stored
            x = h.hashlist(self.plist)
            for path in self.plist:
                if util.rgxin("\(?none\)?  %s" % path, x):
                    h.hashcreate(path)

            # run hashdelete on the list
            result = h.hashdelete(plist)
            h.quit()

            # verify the results
            self.expected_in("hashdelete", result)
            for path in self.plist:
                self.expected_in("hash deleted: md5 %s" % path, result)
            exp = "\(?none\)?  %s/hashnot" % self.hdir
            self.assertTrue(exp not in result,
                            "'%s' not expected in %s" %
                            (exp,
                             util.line_quote(result)))
        except hpss.HSIerror as e:
            if MSG.hpss_unavailable in str(e):
                pytest.skip(str(e))
예제 #4
0
 def test_rgxin(self):
     """
     Routine rgxin(needle, haystack) is analogous to the Python expression
     "needle in haystack" with needle being a regexp.
     """
     self.dbgfunc()
     rgx = "a\(?b\)?c"
     rgx2 = "(dog|fox|over)"
     fstring = "The quick brown fox jumps over the lazy dog"
     tstring1 = "Now we know our abc's"
     tstring2 = "With parens: a(b)c"
     self.assertTrue(U.rgxin(rgx, tstring1),
                     "'%s' should match '%s'" % (rgx, tstring1))
     self.assertTrue(U.rgxin(rgx, tstring2),
                     "'%s' should match '%s'" % (rgx, tstring2))
     self.assertFalse(U.rgxin(rgx, fstring),
                      "'%s' should NOT match '%s'" % (rgx, fstring))
     self.expected('abc', U.rgxin(rgx, tstring1))
     self.expected('a(b)c', U.rgxin(rgx, tstring2))
     self.expected('fox', U.rgxin(rgx2, fstring))
예제 #5
0
 def test_rgxin(self):
     """
     Routine rgxin(needle, haystack) is analogous to the Python expression
     "needle in haystack" with needle being a regexp.
     """
     self.dbgfunc()
     rgx = "a\(?b\)?c"
     rgx2 = "(dog|fox|over)"
     fstring = "The quick brown fox jumps over the lazy dog"
     tstring1 = "Now we know our abc's"
     tstring2 = "With parens: a(b)c"
     self.assertTrue(U.rgxin(rgx, tstring1),
                     "'%s' should match '%s'" % (rgx, tstring1))
     self.assertTrue(U.rgxin(rgx, tstring2),
                     "'%s' should match '%s'" % (rgx, tstring2))
     self.assertFalse(U.rgxin(rgx, fstring),
                      "'%s' should NOT match '%s'" % (rgx, fstring))
     self.expected('abc', U.rgxin(rgx, tstring1))
     self.expected('a(b)c', U.rgxin(rgx, tstring2))
     self.expected('fox', U.rgxin(rgx2, fstring))
예제 #6
0
def main(cfg):
    """
    Main entry point for the cv plugin
    """
    # Get stuff we need -- the logger object, dataroot, etc.
    CrawlConfig.log("firing up")
    plugdir = cfg.get('crawler', 'plugin-dir')
    dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot'))
    odds = cfg.getfloat(plugin_name, 'odds')
    n_ops = int(cfg.get(plugin_name, 'operations'))

    # Initialize our statistics
    (t_checksums, t_matches, t_failures) = get_stats()
    (checksums, matches, failures) = (0, 0, 0)

    # Fetch the list of HPSS objects that we're looking at from the
    # database
    try:
        clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot)
    except CrawlDBI.DBIerror as e:
        if any([util.rgxin(msg, str(e))
                for msg in ["no such table: checkables",
                            "Table '.*' doesn't exist"]]):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise
    except StandardError as e:
        if 'Please call .ex_nihilo()' in str(e):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise

    # We're going to process n_ops things in the HPSS namespace
    for op in range(n_ops):
        # if the list from the database is empty, there's nothing to do
        if 0 < len(clist):
            # but it's not, so grab the first item and check it
            item = clist.pop(0)
            CrawlConfig.log("[%d] checking %s" % (item.rowid, item))
            ilist = item.check()

            # Expected outcomes that check can return:
            #  list of Checkables: read dir or checksummed files (may be empty)
            #  Alert:              checksum verify failed
            #  'access denied':    unaccessible directory
            #  'matched':          a checksum was verified
            #  'checksummed':      file was checksummed
            #  'skipped':          file was skipped
            #  'unavailable':      HPSS is temporarily unavailable
            #  StandardError:      invalid Checkable type (not 'f' or 'd')
            #
            if type(ilist) == str:
                if ilist == "access denied":
                    CrawlConfig.log("dir %s not accessible" % item.path)
                    # clist.remove(item)
                elif ilist == "matched":
                    matches += 1
                    CrawlConfig.log("%s checksums matched" % item.path)
                elif ilist == "checksummed":
                    # checksums += 1
                    CrawlConfig.log("%s checksummed" % item.path)
                elif ilist == "skipped":
                    CrawlConfig.log("%s skipped" % item.path)
                elif ilist == "unavailable":
                    CrawlConfig.log("HPSS is not available")
                    break
                else:
                    CrawlConfig.log("unexpected string returned " +
                                    "from Checkable: '%s'" % ilist)
            elif type(ilist) == list:
                CrawlConfig.log("in %s, found:" % item)
                for n in ilist:
                    CrawlConfig.log(">>> %s" % str(n))
                    if 'f' == n.type and n.checksum != 0:
                        CrawlConfig.log(".. previously checksummed")
                        # checksums += 1
            elif isinstance(ilist, Checkable.Checkable):
                CrawlConfig.log("Checkable returned - file checksummed" +
                                " - %s, %s" % (ilist.path, ilist.checksum))
                # checksums += 1
            elif isinstance(ilist, Alert.Alert):
                CrawlConfig.log("Alert generated: '%s'" %
                                ilist.msg())
                failures += 1
            else:
                CrawlConfig.log("unexpected return val from " +
                                "Checkable.check: %s: %r" %
                                (type(ilist), ilist))

    # Report the statistics in the log
    # ** For checksums, we report the current total minus the previous
    # ** For matches and failures, we counted them up during the iteration
    # ** See the description of get_stats for why we don't store total
    #    checksums
    p_checksums = t_checksums
    t_matches += matches
    t_failures += failures
    cv_lib.update_stats((t_matches, t_failures))

    (t_checksums, t_matches, t_failures) = get_stats()
    CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) +
                    "checksums matched: %d; " % matches +
                    "failures: %d" % failures)
    CrawlConfig.log("totals checksummed: %d; " % t_checksums +
                    "matches: %d; " % t_matches +
                    "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures
예제 #7
0
def main(cfg):
    """
    Main entry point for the cv plugin
    """
    # Get stuff we need -- the logger object, dataroot, etc.
    CrawlConfig.log("firing up")
    plugdir = cfg.get('crawler', 'plugin-dir')
    dataroot = util.csv_list(cfg.get(plugin_name, 'dataroot'))
    odds = cfg.getfloat(plugin_name, 'odds')
    n_ops = int(cfg.get(plugin_name, 'operations'))

    # Initialize our statistics
    (t_checksums, t_matches, t_failures) = get_stats()
    (checksums, matches, failures) = (0, 0, 0)

    # Fetch the list of HPSS objects that we're looking at from the
    # database
    try:
        clist = Checkable.Checkable.get_list(prob=odds, rootlist=dataroot)
    except CrawlDBI.DBIerror as e:
        if any([
                util.rgxin(msg, str(e)) for msg in
            ["no such table: checkables", "Table '.*' doesn't exist"]
        ]):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise
    except StandardError as e:
        if 'Please call .ex_nihilo()' in str(e):
            CrawlConfig.log("calling ex_nihilo")
            Checkable.Checkable.ex_nihilo(dataroot=dataroot)
            clist = Checkable.Checkable.get_list(prob=odds)
        else:
            raise

    # We're going to process n_ops things in the HPSS namespace
    for op in range(n_ops):
        # if the list from the database is empty, there's nothing to do
        if 0 < len(clist):
            # but it's not, so grab the first item and check it
            item = clist.pop(0)
            CrawlConfig.log("[%d] checking %s" % (item.rowid, item))
            ilist = item.check()

            # Expected outcomes that check can return:
            #  list of Checkables: read dir or checksummed files (may be empty)
            #  Alert:              checksum verify failed
            #  'access denied':    unaccessible directory
            #  'matched':          a checksum was verified
            #  'checksummed':      file was checksummed
            #  'skipped':          file was skipped
            #  'unavailable':      HPSS is temporarily unavailable
            #  StandardError:      invalid Checkable type (not 'f' or 'd')
            #
            if type(ilist) == str:
                if ilist == "access denied":
                    CrawlConfig.log("dir %s not accessible" % item.path)
                    # clist.remove(item)
                elif ilist == "matched":
                    matches += 1
                    CrawlConfig.log("%s checksums matched" % item.path)
                elif ilist == "checksummed":
                    # checksums += 1
                    CrawlConfig.log("%s checksummed" % item.path)
                elif ilist == "skipped":
                    CrawlConfig.log("%s skipped" % item.path)
                elif ilist == "unavailable":
                    CrawlConfig.log("HPSS is not available")
                    break
                else:
                    CrawlConfig.log("unexpected string returned " +
                                    "from Checkable: '%s'" % ilist)
            elif type(ilist) == list:
                CrawlConfig.log("in %s, found:" % item)
                for n in ilist:
                    CrawlConfig.log(">>> %s" % str(n))
                    if 'f' == n.type and n.checksum != 0:
                        CrawlConfig.log(".. previously checksummed")
                        # checksums += 1
            elif isinstance(ilist, Checkable.Checkable):
                CrawlConfig.log("Checkable returned - file checksummed" +
                                " - %s, %s" % (ilist.path, ilist.checksum))
                # checksums += 1
            elif isinstance(ilist, Alert.Alert):
                CrawlConfig.log("Alert generated: '%s'" % ilist.msg())
                failures += 1
            else:
                CrawlConfig.log("unexpected return val from " +
                                "Checkable.check: %s: %r" %
                                (type(ilist), ilist))

    # Report the statistics in the log
    # ** For checksums, we report the current total minus the previous
    # ** For matches and failures, we counted them up during the iteration
    # ** See the description of get_stats for why we don't store total
    #    checksums
    p_checksums = t_checksums
    t_matches += matches
    t_failures += failures
    cv_lib.update_stats((t_matches, t_failures))

    (t_checksums, t_matches, t_failures) = get_stats()
    CrawlConfig.log("files checksummed: %d; " % (t_checksums - p_checksums) +
                    "checksums matched: %d; " % matches +
                    "failures: %d" % failures)
    CrawlConfig.log("totals checksummed: %d; " % t_checksums +
                    "matches: %d; " % t_matches + "failures: %d" % t_failures)

    # Report the dimension data in the log
    d = Dimension.Dimension(name='cos')
    t = Dimension.Dimension(name='cart')
    CrawlConfig.log(d.report())
    CrawlConfig.log(t.report())
    return failures