Exemple #1
0
def populate_cart_field(pc_l, limit, dryrun, verbose):
    """
    We get a list of paths and carts. The cart values may be empty or None. We
    talk to hsi to collect cart info for each of the paths, building a return
    list. If 0 < limit, the list returned is limit elements long. If dryrun, we
    just report what would happen without actually doing anything.

    In the array of tuples returned, the cart value comes first so we can pass
    the list to a db.update() call that is going to match on path.
    """
    h = hpss.HSI(verbose=True)
    rval = []
    for path, dcart in pc_l:
        info = h.lsP(path)
        hcart = info.split("\t")[5].strip()
        if dcart != hcart:
            if 0 < limit:
                try:
                    populate_cart_field._count += 1
                except AttributeError:
                    populate_cart_field._count = 1
                if 0 < limit and limit < populate_cart_field._count:
                    return True
            rval.append((path, dcart, hcart))
        if verbose:
            if 60 < len(path):
                dpath = '...' + path[-57:]
            else:
                dpath = path

            print("%-60s %-8s %-10s" % (dpath, dcart, hcart))

    h.quit()
    return rval
Exemple #2
0
def ttype_lookup(pathname, cart=None):
    """
    Use hsi to get the name of the cart where this file lives.

    Look up the cart in table pvlpv and get the type and subtype.

    Look up the type/subtype combination in the *_tape_types table and return
    the corresponding string.
    """
    rval = []

    # Get the cart name from hsi if we don't already have it
    if cart is None or cart == '':
        H = hpss.HSI()
        r = H.lsP(pathname)
        H.quit()

        (type, name, cart, cos) = U.lsp_parse(r)
        if not cart:
            return None

    cartlist = cart.split(',')

    # Get the type/subtype from PVLPV
    for cart in cartlist:
        desc = ttype_cart_to_desc(cart)
        rval.append((cart, desc))

    # Return the media description
    return rval
Exemple #3
0
def copies_by_cos():
    """
    Use hsi to retrieve copy count information for each COS.
    """
    h = hpss.HSI()
    rsp = h.lscos()
    h.quit()
    cbc = {}
    for line in rsp.split("\n"):
        tup = cos_parse(line)
        if tup:
            cbc[tup[0]] = int(tup[1])
    return cbc
def lscos_populate():
    """
    If table lscos already exists, we're done. Otherwise, retrieve the lscos
    info from hsi, create the table, and fill the table in.

    We store the min_size and max_size for each COS as text strings containing
    digits because the largest sizes are already within three orders of
    magnitude of a mysql bigint and growing.
    """
    db = CrawlDBI.DBI(dbtype="crawler")
    tabname = 'lscos'
    st = dbschem.make_table(tabname)
    szrgx = "(\d+([KMGT]B)?)"
    rgx = ("\s*(\d+)\s*(([-_a-zA-Z0-9]+\s)+)\s+[UGAN]*\s+(\d+)" +
           "\s+(ALL)?\s+%s\s+-\s+%s" % (szrgx, szrgx))
    if "Created" == st:
        H = hpss.HSI()
        raw = H.lscos()
        H.quit()

        z = [x.strip() for x in raw.split('\r')]
        rules = [q for q in z if '----------' in q]
        first = z.index(rules[0]) + 1
        second = z[first:].index(rules[0]) + first
        lines = z[first:second]
        data = []
        for line in lines:
            m = U.rgxin(rgx, line)
            (cos, desc, copies, lo_i, hi_i) = (m[0],
                                               m[1].strip(),
                                               m[3],
                                               U.scale(m[5], kb=1024),
                                               U.scale(m[7], kb=1024))
            data.append((cos, desc, copies, lo_i, hi_i))

        db.insert(table=tabname,
                  fields=['cos', 'name', 'copies', 'min_size', 'max_size'],
                  data=data)
        rval = MSG.table_created_S % tabname
    else:
        rval = MSG.table_already_S % tabname

    db.close()
    return rval
    def check(self):
        """
        For a directory:
         - get a list of its contents if possible,
         - create a Checkable object for each item and persist it to the
           database
         - return the list of Checkables found in the directory
        For a file:
         - if it already has a hash, add it to the sample if not already
           and verify it
         - if it does not have a hash, decide whether to add it or not

        The value of probability [0.0 .. 1.0] indicates the likelihood with
        which we should check files.

        potential outcomes            return
         read a directory             list of Checkable objects
         file checksum fail           Alert
         invalid Checkable type       raise StandardError
         access denied                "access denied"
         verified file checksum       "matched"
         checksum a file              "checksummed"
         skipped a file               "skipped"
         hpss unavailable             "unavailable"

        Here we examine a population member, count it as a member of the
        population, decide whether to add it to the sample, and if so, count it
        as a sample member.

        First, we have to make all the decisions and update the object
        accordingly.

        Then, we persist the object to the database.
        """
        # fire up hsi
        # self.probability = probability
        rval = []
        cfg = CrawlConfig.get_config()
        # hsi_timeout = int(cfg.get_d('crawler', 'hsi_timeout', 300))
        try:
            # h = hpss.HSI(timeout=hsi_timeout, verbose=True)
            h = hpss.HSI(verbose=True)
            CrawlConfig.log("started hsi with pid %d" % h.pid())
        except hpss.HSIerror as e:
            return "unavailable"

        if self.type == 'd':
            rsp = h.lsP(self.path)
            if "Access denied" in rsp:
                rval = "access denied"
            else:
                for line in rsp.split("\n"):
                    new = Checkable.fdparse(line)
                    if new is not None:
                        rval.append(new)
                        new.load()
                        new.persist()
                        # returning list of items found in the directory
        elif self.type == 'f':
            if self.cart is None:
                self.populate_cart(h)
            if self.checksum == 0:
                if self.has_hash(h):
                    self.add_to_sample(h, already_hashed=True)
                    rval = self.verify(h)
                    # returning "matched", "checksummed", "skipped", or Alert()
                elif self.addable():
                    rval = self.add_to_sample(h)
                    # returning "access denied" or "checksummed"
                else:
                    rval = "skipped"
            else:
                rval = self.verify(h)
                # returning "matched", "checksummed", "skipped", or Alert()
        else:
            raise StandardError("Invalid Checkable type: %s" % self.type)

        if (3 < self.fails) and (0 == self.reported):
            self.fail_report(h.before())
            rval = "skipped"

        h.quit()

        self.set('last_check', time.time())
        CrawlConfig.log(
            "Persisting checkable '%s' with %s = %f, %s = %d" %
            (self.path, 'last_check', self.last_check, 'fails', self.fails))
        self.persist()
        return rval