Example #1
0
async def verifyDomain(domain):
    """ create domain if it doesn't already exist
    """
    params = {"host": domain}
    headers = getRequestHeaders()
    client = globals["client"]
    req = getEndpoint() + '/'
    root_id = None
    log.info("GET " + req)
    timeout = config.get("timeout")
    async with client.get(req, headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status == 200:
            domain_json = await rsp.json()
        else:
            log.info("got status: {}".format(rsp.status))
    if rsp.status == 200:
        root_id = domain_json["root"]
    elif rsp.status == 404:
        # create the domain
        setupDomain(domain)
        async with client.get(req, headers=headers, timeout=timeout, params=params) as rsp:
            if rsp.status == 200:
                domain_json = await rsp.json()
                root_id = domain_json["root"]
            else:
                log.error("got status: {} for GET req: {}".format(rsp.status, req))
                raise HttpProcessingError(code=rsp.status, message="Service error")
    globals["root"] = root_id
Example #2
0
async def scanRootKeys(app, update=False):

    # iterate through all s3 root keys in the bucket.
    #
    # Note: not re-entrant!  Only one scanRoot an be run at a time per app.
    log.info("scanRootKeys")
    app["scanRootKeys_update"] = update

    await getStorKeys(app, prefix="db/", deliminator='/', include_stats=False, callback=getS3RootKeysCallback)
Example #3
0
async def bucketCheck(app):
    """ Verify that contents of bucket are self-consistent
    """

    now = int(time.time())
    log.info("bucket check {}".format(unixTimeToUTC(now)))

    # do initial listKeys
    await listKeys(app)

    # clear used flags
    clearUsedFlags(app)

    # mark objs
    await markObjs(app)

    unlinked_count = 0
    s3objs = app["s3objs"]
    for objid in s3objs:
        if isValidUuid(objid) and not isValidChunkId(objid):
            try:
                s3obj = await getS3Obj(app, objid)
                if s3obj.used is False:
                    unlinked_count += 1
            except HTTPInternalServerError as hpe:
                log.warn("got error retreiving {}: {}".format(objid, hpe.code))

    domains = app["domains"]
    for domain in domains:
        print("domain:", domain)
    roots = app["roots"]
    for root in roots:
        print("root:", root)

    top_level_domains = []
    for domain in domains:
        if domain[0] != '/':
            log.error("unexpected domain: {}".format(domain))
            continue
        if domain[1:].find('/') == -1:
            top_level_domains.append(domain)

    print("top-level-domains:")
    for domain in top_level_domains:
        print(domain)
    print("=" * 80)

    print("total storage: {}".format(app["bytes_in_bucket"]))
    print("Num objects: {}".format(len(app["s3objs"])))
    print("Num domains: {}".format(len(app["domains"])))
    print("Num root groups: {}".format(len(app["roots"])))
    print("Unlinked objects: {}".format(unlinked_count))
Example #4
0
async def getKeysCallback(app, s3keys):
    log.info(f"getKeysCallback, {len(s3keys)} items")

    if not isinstance(s3keys, list):
        log.error("expected list result for objDeleteCallback")
        raise ValueError("unexpected callback format")

    if "root_prefix" not in app or not app["root_prefix"]:
        log.error("Unexpected getKeysCallback")
        raise ValueError("Invalid getKeysCallback")

    prefix = app["root_prefix"]
    prefix_len = len(prefix)
    for s3key in s3keys:
        if not s3key.startswith(prefix):
            log.error(f"Unexpected key {s3key} for prefix: {prefix}")
            raise ValueError("invalid s3key for getKeysCallback")
        if not s3key.endswith(".dataset.json"):
            log.info(f"got unexpected key {s3key}, ignoring")
            continue
        dset_key = prefix + s3key[prefix_len:]
        log.info(f"getKeys - :{dset_key}")
        await checkDataset(app, dset_key)

    log.info("getKeysCallback complete")
async def createDomain(app, domain, domain_json):
    try:
        s3_key = getS3Key(domain)
        domain_exists = await isS3Obj(app, s3_key)
        if domain_exists:
            raise ValueError("Domain already exists")
        parent_domain = getParentDomain(domain)
        if parent_domain is None:
            raise ValueError("Domain must have a parent")

        log.info("writing domain")
        await putS3JSONObj(app, s3_key, domain_json)
        print("domain created!  s3_key: {}  domain_json: {}".format(
            s3_key, domain_json))
    except ValueError as ve:
        print("Got ValueError exception: {}".format(str(ve)))
    except ClientOSError as coe:
        print("Got S3 error: {}".format(str(coe)))
async def createDomain(app, domain, domain_json):
    try:
        domain = app["bucket_name"] + domain
        print("domain:", domain)
        s3_key = getS3Key(domain)
        print("s3_key: ", s3_key)
        domain_exists = await isStorObj(app, s3_key)
        if domain_exists:
            raise ValueError("Domain already exists")
        parent_domain = getParentDomain(domain)
        if parent_domain is None:
            raise ValueError("Domain must have a parent")

        log.info("writing domain")
        await putStorJSONObj(app, s3_key, domain_json)
        print("domain created!  s3_key: {}  domain_json: {}".format(
            s3_key, domain_json))
    except ValueError as ve:
        print("Got ValueError exception: {}".format(str(ve)))
        raise
Example #7
0
async def getS3RootKeysCallback(app, s3keys):
    log.info(f"getS3RootKeysCallback, {len(s3keys)} items")
    if not isinstance(s3keys, list):
        log.error("expected list result for s3keys callback")
        raise ValueError("unexpected callback format")
    results = app["bucket_scan"]

    for s3key in s3keys:
        log.info(f"got key: {s3key}")
        if not s3key.startswith("db/") or s3key[-1] != '/':
            log.error(f"unexpected key for getS3RootKeysCallback: {s3key}")
            continue
        root_id = getObjId(s3key + ".group.json")
        log.info(f"root_id: {root_id}")
        results["root_count"] += 1

        info_key = s3key + ".info.json"

        if app["scanRootKeys_update"]:
            log.info("updating...")
            await scanRoot(app, root_id, update=True)

        info_obj = None
        try:
            info_obj = await getStorJSONObj(app, info_key)
        except HTTPNotFound:
            pass  # info.json not created yet
        except HTTPInternalServerError as ie:
            log.warn(f"error getting s3obj: {ie}")
            continue

        if info_obj:
            log.info(f"got obj: {info_obj}")
            results["info_count"] += 1
            results["group_count"] += info_obj["num_groups"]
            results["dataset_count"] += len(info_obj["datasets"])
            results["datatype_count"] += info_obj["num_datatypes"]
            results["chunk_count"] += info_obj["num_chunks"]
            results["allocated_bytes"] += info_obj["allocated_bytes"]
            results["metadata_bytes"] += info_obj["metadata_bytes"]
Example #8
0
async def run_scan(app, rootid, update=False):

    root_key = getS3Key(rootid)

    if not root_key.endswith("/.group.json"):
        raise ValueError("unexpected root key")
    root_prefix = root_key[:-(len(".group.json"))]
    app["root_prefix"] = root_prefix

    try:
        await getStorKeys(app,
                          prefix=root_prefix,
                          suffix=".dataset.json",
                          include_stats=False,
                          callback=getKeysCallback)
    except ClientError as ce:
        log.error(f"removeKeys - getS3Keys faiiled: {ce}")
    except HTTPNotFound:
        log.warn(
            f"getStorKeys - HTTPNotFound error for getStorKeys with prefix: {root_prefix}"
        )
    except HTTPInternalServerError:
        log.error(
            f"getStorKeys - HTTPInternalServerError for getStorKeys with prefix: {root_prefix}"
        )
    except Exception as e:
        log.error(
            f"getStorKeys - Unexpected Exception for getStorKeys with prefix: {root_prefix}: {e}"
        )

    # update all chunks for datasets with H5D_CHUNKED_REF_INDIRECT layout
    indirect_dataset_keys = app["indirect_dataset_keys"]
    for prefix in indirect_dataset_keys:
        log.info(f"got inidirect prefix: {prefix}")
        # TBD...

    await releaseStorageClient(app)
Example #9
0
def main(): 
    domain = getTestDomainName("mkgroups_perf")
    print("domain: {}".format(domain) )
       
    log.info("initializing")
    signal.signal(signal.SIGTERM, sig_handler)  # add handlers for early exit
    signal.signal(signal.SIGINT, sig_handler)

    loop = asyncio.get_event_loop()
    globals["loop"] = loop
    #domain = helper.getTestDomainName()
    
    # create a client Session here so that all client requests 
    #   will share the same connection pool
    max_tcp_connections = int(config.get("max_tcp_connections"))
    client = ClientSession(loop=loop, connector=TCPConnector(limit=max_tcp_connections))
    globals["client"] = client
    globals["group_count"] = 0
    globals["grp_request_count"] = 0
    globals["lnk_request_count"] = 0
    globals["grp_failed_posts"] = 0
    globals["lnk_failed_posts"] = 0
    globals["group_target"] = config.get("group_target")
    max_concurrent_tasks = config.get("max_concurrent_tasks")

    loop.run_until_complete(getEndpoints())

    if len(globals["sn_endpoints"]) == 0:
        log.error("no SN endpoints found!")
        loop.close()
        client.close()
        sys.exit()
    for endpoint in globals["sn_endpoints"]:
        log.info("got endpoint: {}".format(endpoint))

    loop.run_until_complete(verifyDomain(domain))
    globals["domain"] = domain # save the domain 
    globals["start_time"] = time.time()

    # start making groups!
    while globals["grp_request_count"] < globals["group_target"]:
        tasks = []
        count = max_concurrent_tasks
        if globals["group_target"] - globals["grp_request_count"] < count:
            count = globals["group_target"] - globals["grp_request_count"]
        log.info("adding {} tasks".format(count))
        for i in range(count):
            tasks.append(asyncio.ensure_future(createGroup()))   
        # got a batch, move them out!
        loop.run_until_complete(asyncio.gather(*tasks))
        tasks = []
    
    loop.close()
    client.close()
    globals["stop_time"] = time.time()

    print_results()     
Example #10
0
async def getEndpoints():
    docker_machine_ip = config.get("docker_machine_ip")
    req = "{}/nodestate/sn".format(config.get("head_endpoint")) 
    client = globals["client"]
    async with client.get(req) as rsp:
        if rsp.status == 200:
            rsp_json = await rsp.json()
    nodes = rsp_json["nodes"]
    sn_endpoints = []
    docker_links = checkDockerLink()
    for node in nodes:
        if not node["host"]:
            continue
        if docker_links:
            # when running in docker, use the machine address as host
            host = "hsds_sn_{}".format(node["node_number"])
        elif docker_machine_ip:
            host = docker_machine_ip
        else:
            host = node["host"]
        url = "http://{}:{}".format(host, node["port"])
        sn_endpoints.append(url)
    log.info("{} endpoints".format(len(sn_endpoints)))
    globals["sn_endpoints"] = sn_endpoints
Example #11
0
async def createGroup():
    """ create a new group and link it to the parent group with 
    link name of group name
    """
    client = globals["client"]
    domain = globals["domain"]
    params = {"host": domain}
    base_req = getEndpoint()
    headers = getRequestHeaders()
  
    # create a new group
    req = base_req + "/groups"
    log.info("POST:" + req)
    globals["grp_request_count"] += 1
    group_name = globals["grp_request_count"]
    timeout = config.get("timeout")
    async with client.post(req, headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status != 201:
            log.error("POST {} failed with status: {}, rsp: {}".format(req, rsp.status, str(rsp)))
            globals["grp_failed_posts"] += 1
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            globals["group_count"] += 1
            log.info("group_count: {}".format(globals["group_count"]))
        group_json = await rsp.json()
        group_id = group_json["id"]

    # link group to parent
    root_id = globals["root"] 
    group_name = "group_{}".format(group_name)  
    req = base_req + "/groups/" + root_id + "/links/" + group_name
    data = {"id": group_id }
    log.info("PUT " + req)
    globals["lnk_request_count"] += 1
    async with client.put(req, data=json.dumps(data), headers=headers, timeout=timeout, params=params) as rsp:
        if rsp.status == 409:
            # another task has created this link already
            log.warn("got 409 in request: " + req)
        elif rsp.status != 201:
            globals["lnk_failed_posts"] += 1
            log.error("got http error: {} for request: {}, rsp: {}".format(rsp.status, req, rsp))
            raise HttpProcessingError(code=rsp.status, message="Unexpected error")
        else:
            link_created = True
    
    return group_id                
async def shutdown(app):
    log.info("closing storage connections")
    await releaseStorageClient(app)
Example #13
0
async def bucketCheck(app, base_folder):
    """ Verify that contents of bucket are self-consistent
    """

    now = int(time.time())
    log.info("bucket check {}".format(unixTimeToUTC(now)))

    bucket = app["bucket_name"]

    if base_folder.startswith('/'):
        # slash is not part of the storage key
        prefix = base_folder[1:]
    else:
        prefix = base_folder

    keys = await getStorKeys(app, prefix=prefix, suffix='domain.json')

    root_count = 0
    group_count = 0
    dataset_count = 0
    datatype_count = 0
    chunk_count = 0
    total_chunk_bytes = 0
    total_metadata_bytes = 0

    if not keys:
        print("no storage keys were found!")
        return

    log.info(f"got {len(keys)} keys")
    print(
        "name, num_groups, num_datasets, num_datatypes, num chunks, metadata bytes, chunk bytes"
    )
    for key in keys:
        log.info(f"got key: {key}")
        domain_json = await getStorJSONObj(app, key, bucket=bucket)
        #print("domain_json:", domain_json)
        if "root" not in domain_json:
            log.info(f"skipping folder object: {key}")
            continue
        root_id = domain_json["root"]
        scan = await scanRoot(app, root_id, bucket=bucket)
        log.debug(f"got scan_results: {scan}")
        num_groups = scan["num_groups"]
        datasets = scan["datasets"]
        num_datasets = len(datasets)
        num_datatypes = scan["num_datatypes"]
        num_chunks = scan["num_chunks"]
        chunk_bytes = scan["allocated_bytes"]
        metadata_bytes = scan["metadata_bytes"]

        print(
            f"{key},{num_groups},{num_datasets},{num_datatypes},{num_chunks},{metadata_bytes},{chunk_bytes}"
        )

        # TBD - get service scan results from .info.json and compare to ones just calculated
        root_count += 1
        group_count += num_groups
        dataset_count += num_datasets
        datatype_count += num_datatypes
        chunk_count += num_chunks
        total_chunk_bytes += chunk_bytes
        total_metadata_bytes += metadata_bytes

    await releaseStorageClient(app)

    print("")
    print("Totals")
    print("=" * 40)
    print(f"folders: {len(keys) - root_count}")
    print(f"domains: {root_count}")
    print(f"groups: {group_count}")
    print(f"datasets: {dataset_count}")
    print(f"chunk count {chunk_count}")
    print(f"metadata bytes: {total_metadata_bytes}")
    print(f"chunk bytes: {total_chunk_bytes}")
    print("")
Example #14
0
async def checkDataset(app, dset_key):
    log.info(f"checkDataset for key: {dset_key}")
    dset_json = await getStorJSONObj(app, dset_key)
    dset_id = dset_json["id"]
    prefix_old = app["prefix_old"]
    prefix_new = app["prefix_new"]
    do_update = app["do_update"]
    indirect_dataset_keys = app["indirect_dataset_keys"]
    app["dataset_count"] += 1
    log.info(f"checkDataset for id: {dset_id}")
    if "layout" not in dset_json:
        log.info("no layout found")
        return
    layout_json = dset_json["layout"]
    if "class" not in layout_json:
        log.warn(f"no class found in layout for id: {dset_id}")
        return
    layout_class = layout_json["class"]
    log.info(f"got layout_class: {layout_class}")
    if layout_class in ('H5D_CONTIGUOUS_REF', 'H5D_CHUNKED_REF'):
        if "file_uri" not in layout_json:
            log.warn(
                f"Expected to find key 'file_uri' in layout_json for id: {dset_id}"
            )
            return
        file_uri = layout_json["file_uri"]
        if file_uri.startswith(prefix_old):
            new_file_uri = prefix_new + file_uri[len(prefix_old):]
            log.info(f"replacing uri: {file_uri} with {new_file_uri}")
            app["matched_dset_uri"] += 1
            if do_update:
                # update the dataset json
                layout_json["file_uri"] = new_file_uri
                dset_json["layout"] = layout_json
                # write back to storage
                try:
                    await putStorJSONObj(app, dset_key, dset_json)
                    log.info(f"dataset {dset_id} updated")
                except Exception as e:
                    log.error(f"get exception writing dataset json: {e}")
    elif layout_class == 'H5D_CHUNKED_REF_INDIRECT':
        # add to list to be scanned later
        indirect_dataset_keys += dset_key[:-len(".dataset.json")]
    else:
        log.info(f"skipping check for layout_class: {layout_class}")
async def shutdown(app):
    log.info("closing S3 connections")
    await releaseClient(app)