Ejemplo n.º 1
0
async def bucketCheck(app):
    """ Verify that contents of bucket are self-consistent
    """

    now = int(time.time())
    log.info("bucket check {}".format(unixTimeToUTC(now)))

    # do initial listKeys
    await listKeys(app)

    # clear used flags
    clearUsedFlags(app)

    # mark objs
    await markObjs(app)

    unlinked_count = 0
    s3objs = app["s3objs"]
    for objid in s3objs:
        if isValidUuid(objid) and not isValidChunkId(objid):
            try:
                s3obj = await getS3Obj(app, objid)
                if s3obj.used is False:
                    unlinked_count += 1
            except HTTPInternalServerError as hpe:
                log.warn("got error retreiving {}: {}".format(objid, hpe.code))

    domains = app["domains"]
    for domain in domains:
        print("domain:", domain)
    roots = app["roots"]
    for root in roots:
        print("root:", root)

    top_level_domains = []
    for domain in domains:
        if domain[0] != '/':
            log.error("unexpected domain: {}".format(domain))
            continue
        if domain[1:].find('/') == -1:
            top_level_domains.append(domain)

    print("top-level-domains:")
    for domain in top_level_domains:
        print(domain)
    print("=" * 80)

    print("total storage: {}".format(app["bytes_in_bucket"]))
    print("Num objects: {}".format(len(app["s3objs"])))
    print("Num domains: {}".format(len(app["domains"])))
    print("Num root groups: {}".format(len(app["roots"])))
    print("Unlinked objects: {}".format(unlinked_count))
Ejemplo n.º 2
0
async def createGroup():
    """ create a new group and link it to the parent group with 
    link name of group name
    """
    client = globals["client"]
    domain = globals["domain"]
    params = {"host": domain}
    base_req = getEndpoint()
    headers = getRequestHeaders()
  
    # create a new group
    req = base_req + "/groups"
    log.info("POST:" + req)
    globals["grp_request_count"] += 1
    group_name = globals["grp_request_count"]
    timeout = config.get("timeout")
    async with client.post(req, headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status != 201:
            log.error("POST {} failed with status: {}, rsp: {}".format(req, rsp.status, str(rsp)))
            globals["grp_failed_posts"] += 1
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            globals["group_count"] += 1
            log.info("group_count: {}".format(globals["group_count"]))
        group_json = await rsp.json()
        group_id = group_json["id"]

    # link group to parent
    root_id = globals["root"] 
    group_name = "group_{}".format(group_name)  
    req = base_req + "/groups/" + root_id + "/links/" + group_name
    data = {"id": group_id }
    log.info("PUT " + req)
    globals["lnk_request_count"] += 1
    async with client.put(req, data=json.dumps(data), headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status == 409:
            # another task has created this link already
            log.warn("got 409 in request: " + req)
        elif rsp.status != 201:
            globals["lnk_failed_posts"] += 1
            log.error("got http error: {} for request: {}, rsp: {}".format(rsp.status, req, rsp))
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            link_created = True
    
    return group_id                
Ejemplo n.º 3
0
async def checkDataset(app, dset_key):
    log.info(f"checkDataset for key: {dset_key}")
    dset_json = await getStorJSONObj(app, dset_key)
    dset_id = dset_json["id"]
    prefix_old = app["prefix_old"]
    prefix_new = app["prefix_new"]
    do_update = app["do_update"]
    indirect_dataset_keys = app["indirect_dataset_keys"]
    app["dataset_count"] += 1
    log.info(f"checkDataset for id: {dset_id}")
    if "layout" not in dset_json:
        log.info("no layout found")
        return
    layout_json = dset_json["layout"]
    if "class" not in layout_json:
        log.warn(f"no class found in layout for id: {dset_id}")
        return
    layout_class = layout_json["class"]
    log.info(f"got layout_class: {layout_class}")
    if layout_class in ('H5D_CONTIGUOUS_REF', 'H5D_CHUNKED_REF'):
        if "file_uri" not in layout_json:
            log.warn(
                f"Expected to find key 'file_uri' in layout_json for id: {dset_id}"
            )
            return
        file_uri = layout_json["file_uri"]
        if file_uri.startswith(prefix_old):
            new_file_uri = prefix_new + file_uri[len(prefix_old):]
            log.info(f"replacing uri: {file_uri} with {new_file_uri}")
            app["matched_dset_uri"] += 1
            if do_update:
                # update the dataset json
                layout_json["file_uri"] = new_file_uri
                dset_json["layout"] = layout_json
                # write back to storage
                try:
                    await putStorJSONObj(app, dset_key, dset_json)
                    log.info(f"dataset {dset_id} updated")
                except Exception as e:
                    log.error(f"get exception writing dataset json: {e}")
    elif layout_class == 'H5D_CHUNKED_REF_INDIRECT':
        # add to list to be scanned later
        indirect_dataset_keys += dset_key[:-len(".dataset.json")]
    else:
        log.info(f"skipping check for layout_class: {layout_class}")
Ejemplo n.º 4
0
async def getS3RootKeysCallback(app, s3keys):
    log.info(f"getS3RootKeysCallback, {len(s3keys)} items")
    if not isinstance(s3keys, list):
        log.error("expected list result for s3keys callback")
        raise ValueError("unexpected callback format")
    results = app["bucket_scan"]

    for s3key in s3keys:
        log.info(f"got key: {s3key}")
        if not s3key.startswith("db/") or s3key[-1] != '/':
            log.error(f"unexpected key for getS3RootKeysCallback: {s3key}")
            continue
        root_id = getObjId(s3key + ".group.json")
        log.info(f"root_id: {root_id}")
        results["root_count"] += 1

        info_key = s3key + ".info.json"

        if app["scanRootKeys_update"]:
            log.info("updating...")
            await scanRoot(app, root_id, update=True)

        info_obj = None
        try:
            info_obj = await getStorJSONObj(app, info_key)
        except HTTPNotFound:
            pass  # info.json not created yet
        except HTTPInternalServerError as ie:
            log.warn(f"error getting s3obj: {ie}")
            continue

        if info_obj:
            log.info(f"got obj: {info_obj}")
            results["info_count"] += 1
            results["group_count"] += info_obj["num_groups"]
            results["dataset_count"] += len(info_obj["datasets"])
            results["datatype_count"] += info_obj["num_datatypes"]
            results["chunk_count"] += info_obj["num_chunks"]
            results["allocated_bytes"] += info_obj["allocated_bytes"]
            results["metadata_bytes"] += info_obj["metadata_bytes"]
Ejemplo n.º 5
0
async def run_scan(app, rootid, update=False):

    root_key = getS3Key(rootid)

    if not root_key.endswith("/.group.json"):
        raise ValueError("unexpected root key")
    root_prefix = root_key[:-(len(".group.json"))]
    app["root_prefix"] = root_prefix

    try:
        await getStorKeys(app,
                          prefix=root_prefix,
                          suffix=".dataset.json",
                          include_stats=False,
                          callback=getKeysCallback)
    except ClientError as ce:
        log.error(f"removeKeys - getS3Keys faiiled: {ce}")
    except HTTPNotFound:
        log.warn(
            f"getStorKeys - HTTPNotFound error for getStorKeys with prefix: {root_prefix}"
        )
    except HTTPInternalServerError:
        log.error(
            f"getStorKeys - HTTPInternalServerError for getStorKeys with prefix: {root_prefix}"
        )
    except Exception as e:
        log.error(
            f"getStorKeys - Unexpected Exception for getStorKeys with prefix: {root_prefix}: {e}"
        )

    # update all chunks for datasets with H5D_CHUNKED_REF_INDIRECT layout
    indirect_dataset_keys = app["indirect_dataset_keys"]
    for prefix in indirect_dataset_keys:
        log.info(f"got inidirect prefix: {prefix}")
        # TBD...

    await releaseStorageClient(app)
Ejemplo n.º 6
0
def sig_handler(sig, frame):
    log.warn("Caught signal: {}".format(str(sig)))
    print_results()
    sys.exit()