Example #1
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" not in params:
        log.warn("expected count param")
        raise HTTPBadRequest()
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points - num_points: {num_points}")
        put_points = True
    else:
        log.info(f"POST Chunk get points - num_points: {num_points}")

    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    dims = getChunkLayout(dset_json)
    rank = len(dims)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    if rank == 1:
        coord_type_str = "uint64"
    else:
        coord_type_str = f"({rank},)uint64"

    if put_points:
        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        point_dt = np.dtype([("coord", np.dtype(coord_type_str)),
                             ("value", dset_dtype)])
        point_shape = (num_points, )
        chunk_init = True
    else:
        point_dt = np.dtype('uint64')
        point_shape = (num_points, rank)
        chunk_init = False

    point_arr = bytesToArray(input_bytes, point_dt, point_shape)

    chunk_arr = await get_chunk(app,
                                chunk_id,
                                dset_json,
                                bucket=bucket,
                                s3path=s3path,
                                s3offset=s3offset,
                                s3size=s3size,
                                chunk_init=chunk_init)
    if chunk_arr is None:
        log.warn(f"chunk {chunk_id} not found")
        raise HTTPNotFound()

    if put_points:
        # writing point data
        try:
            chunkWritePoints(chunk_id=chunk_id,
                             chunk_layout=dims,
                             chunk_arr=chunk_arr,
                             point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkWritePoints: {ve}")
            raise HTTPBadRequest()
        # write empty response
        resp = json_response({})

        save_chunk(app, chunk_id, dset_json,
                   bucket=bucket)  # lazily write chunk to storage
    else:
        # read points
        try:
            output_arr = chunkReadPoints(chunk_id=chunk_id,
                                         chunk_layout=dims,
                                         chunk_arr=chunk_arr,
                                         point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkReadPoints: {ve}")
            raise HTTPBadRequest()
        output_data = arrayToBytes(output_arr)
        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp
Example #2
0
async def DELETE_Domain(request):
    """HTTP method to delete a domain resource"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    domain = None
    meta_only = False  # if True, just delete the meta cache value
    keep_root = False
    if request.has_body:
        body = await request.json()
        if "meta_only" in body:
            meta_only = body["meta_only"]
        if "keep_root" in body:
            keep_root = body["keep_root"]
    else:
        if "meta_only" in params:
            meta_only = params["meta_only"]
        if "keep_root" in params:
            keep_root = params["keep_root"]

    domain = None
    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        log.warn(f"Invalid domain: {domain}")
        raise HTTPBadRequest(reason="Invalid domain name")
    bucket = getBucketForDomain(domain)
    log.debug(f"GET_Domain domain: {domain} bucket: {bucket}")

    if not domain:
        msg = "No domain given"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    log.info(f"meta_only domain delete: {meta_only}")
    if meta_only:
        # remove from domain cache if present
        domain_cache = app["domain_cache"]
        if domain in domain_cache:
            log.info(f"deleting {domain} from domain_cache")
            del domain_cache[domain]
        resp = await jsonResponse(request, {})
        return resp

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    parent_domain = getParentDomain(domain)
    if not parent_domain or getPathForDomain(parent_domain) == '/':
        is_toplevel = True
    else:
        is_toplevel = False

    if is_toplevel and username != "admin":
        msg = "Deletion of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code == 404:
            log.warn("domain not found")
            raise HTTPNotFound()
        elif ce.code == 410:
            log.warn("domain has been removed")
            raise HTTPGone()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    aclCheck(domain_json, "delete",
             username)  # throws exception if not allowed

    # check for sub-objects if this is a folder
    if "root" not in domain_json:
        index = domain.find('/')
        s3prefix = domain[(index + 1):] + '/'
        log.info(f"checking s3key with prefix: {s3prefix} in bucket: {bucket}")
        s3keys = await getS3Keys(app,
                                 include_stats=False,
                                 prefix=s3prefix,
                                 deliminator='/',
                                 bucket=bucket)
        for s3key in s3keys:
            if s3key.endswith("/"):
                log.warn(f"attempt to delete folder {domain} with sub-items")
                log.debug(f"got prefix: {s3keys[0]}")
                raise HTTPConflict(reason="folder has sub-items")

    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"domain": domain}

    rsp_json = await http_delete(app, req, data=body)
    params = {}  # for http_delete requests to DN nodes
    if bucket:
        params["bucket"] = bucket

    if "root" in domain_json and not keep_root:
        # delete the root group

        root_id = domain_json["root"]
        req = getDataNodeUrl(app, root_id)
        req += "/groups/" + root_id
        await http_delete(app, req, params=params)

    # remove from domain cache if present
    domain_cache = app["domain_cache"]
    if domain in domain_cache:
        del domain_cache[domain]

    # delete domain cache from other sn_urls
    sn_urls = app["sn_urls"]
    body["meta_only"] = True
    for node_no in sn_urls:
        if node_no == app["node_number"]:
            continue  # don't send to ourselves
        sn_url = sn_urls[node_no]
        req = sn_url + "/"
        log.info(f"sending sn request: {req}")
        try:
            sn_rsp = await http_delete(app, req, data=body, params=params)
            log.info(f"{req} response: {sn_rsp}")
        except ClientResponseError as ce:
            log.warn(f"got error for sn_delete: {ce}")

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Example #3
0
async def register(request):
    """ HTTP method for nodes to register with head node"""
    log.request(request)
    app = request.app
    if not request.has_body:
        msg = "register missing body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    body = await request.json()
    log.info(f"register request body: {body}")
    node_host = None
    node_port = None
    node_type = None
    node_id = None
    if 'id' not in body:
        msg = "Missing 'id'"
        log.response(request, code=400, message=msg)
        raise HTTPBadRequest(reason=msg)
    node_id = body['id']
    if 'node_type' not in body:
        msg = "missing key 'node_type'"
        log.response(request, code=400, message=msg)
        raise HTTPBadRequest(reason=msg)
    node_type = body['node_type']
    if node_type not in ('sn', 'dn'):
        msg=f"invalid node_type: {node_type}"
        log.response(request, code=400, message=msg)
        raise HTTPBadRequest(reason=msg)
    if 'port' not in body:
        msg = "missing key 'port'"
        log.response(request, code=400, message=msg)
        raise HTTPBadRequest(reason=msg)
    node_port = body['port']
    
    if 'ip' not in body:
        log.debug("register - get ip/port from request.transport")
        peername = request.transport.get_extra_info('peername')
        if peername is None:
            msg = "Can not determine caller IP"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)
        if peername[0] is None or peername[0] in ("::1", "127.0.0.1"):
            node_host = "localhost"  
        else:
            node_host = peername[0]
    else:
        #Specify the ip is useful in docker / DCOS situations, where in certain situations a 
        #docker private network IP might be used
        node_host = body["ip"]

    log.info(f"register host: {node_host}, port: {node_port}")    

    nodes = app['nodes']
    dead_node_ids = app['dead_node_ids']

    if node_id in nodes:
        # already registered?
        node = nodes[node_id]
        if node_type != node.type:
            msg = f"Unexpected node_type {node_type} (expected: {node.type})for node_id: {node_id}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)
        if node_port != node.port:
            msg = f"Unexpected node_port {node_port} (expected: {node.port}) for node_id: {node_id}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)
        if node_host != node.host:
            msg = f"Unexpected node_host {node_host}(expected: {node.host}) for node_id: {node_id}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)
        node.poll_update()  # note that the node has checked in
    elif node_id in dead_node_ids:
        log.error(f"unexpected register request from node id: {node_id}")
        raise HTTPInternalServerError()
    else:
        log.info(f"Node {node_id} is unknown, new node coming online.")
        node = Node(node_id=node_id, node_type=node_type, node_host=node_host, node_port=node_port)   
        # delete any existing node with the same port
        removeNode(app, host=node_host, port=node_port)
        nodes[node_id] = node

    resp = StreamResponse()
    resp.headers['Content-Type'] = 'application/json'
    answer = {}

    if await isClusterReady(app):
        answer["cluster_state"] = "READY"
    else:
        answer["cluster_state"] = "WAITING"
    sn_urls = []
    dn_urls = []
    sn_ids = []
    dn_ids = []
    for node_id in nodes:
        node = nodes[node_id]
        if not node.is_healthy():
            continue
        node_url = f"http://{node.host}:{node.port}"
        if node.type == "sn":
            sn_urls.append(node_url)
            sn_ids.append(node_id)
        else:
            dn_urls.append(node_url)
            dn_ids.append(node_id)

    # sort dn_urls so node number can be determined
    dn_id_map = {}
    for i in range(len(dn_urls)):
        dn_url = dn_urls[i]
        dn_id = dn_ids[i]
        dn_id_map[dn_url] = dn_id

    dn_urls.sort()
    dn_ids = [] # re-arrange to match url order
    for dn_url in dn_urls:
        dn_ids.append(dn_id_map[dn_url])

    answer["sn_urls"] = sn_urls
    answer["dn_urls"] = dn_urls
    answer["sn_ids"] = sn_ids
    answer["dn_ids"] = dn_ids
    answer["req_ip"] = node_host
    log.debug(f"register returning: {answer}")
    app["last_health_check"] = int(time.time())

    resp = json_response(answer)
    log.response(request, resp=resp)
    return resp
Example #4
0
async def GET_ACLs(request):
    """HTTP method to return JSON for domain/ACLs"""
    log.request(request)
    app = request.app

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(message=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError:
        log.warn("domain not found")
        log.warn(msg)
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug(f"got domain_json: {domain_json}")
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "readACL",
             username)  # throws exception if not authorized

    acl_list = []
    acl_usernames = list(acls.keys())
    acl_usernames.sort()
    for acl_username in acl_usernames:
        entry = {"userName": acl_username}
        acl = acls[acl_username]

        for k in acl.keys():
            entry[k] = acl[k]
        acl_list.append(entry)
    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acls"] = acl_list

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Example #5
0
async def GET_Domain(request):
    """HTTP method to return JSON for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = None
    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        log.warn(f"Invalid domain: {domain}")
        raise HTTPBadRequest(reason="Invalid domain name")
    bucket = getBucketForDomain(domain)
    log.debug(f"GET_Domain domain: {domain} bucket: {bucket}")

    if not bucket and not config.get("bucket_name"):
        # no bucket defined, raise 400
        msg = "Bucket not provided"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    verbose = False
    if "verbose" in params and params["verbose"]:
        verbose = True

    if not domain:
        log.info("no domain passed in, returning all top-level domains")
        # no domain passed in, return top-level domains for this request
        domains = await get_domains(request)
        rsp_json = {"domains": domains}
        rsp_json["hrefs"] = []
        resp = await jsonResponse(request, rsp_json)
        log.response(request, resp=resp)
        return resp

    log.info(f"got domain: {domain}")

    domain_json = await getDomainJson(app, domain, reload=True)

    if domain_json is None:
        log.warn(f"domain: {domain} not found")
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug(f"got domain_json: {domain_json}")
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    if "h5path" in params:
        # if h5path is passed in, return object info for that path
        #   (if exists)
        h5path = params["h5path"]
        root_id = domain_json["root"]
        obj_id = await getObjectIdByPath(app, root_id, h5path, bucket=bucket
                                         )  # throws 404 if not found
        log.info(f"get obj_id: {obj_id} from h5path: {h5path}")
        # get authoritative state for object from DN (even if it's in the meta_cache).
        obj_json = await getObjectJson(app,
                                       obj_id,
                                       refresh=True,
                                       bucket=bucket)
        obj_json["domain"] = domain
        # Not bothering with hrefs for h5path lookups...
        resp = await jsonResponse(request, obj_json)
        log.response(request, resp=resp)
        return resp

    # return just the keys as per the REST API
    rsp_json = await get_domain_response(app,
                                         domain_json,
                                         bucket=bucket,
                                         verbose=verbose)

    # include domain objects if requested
    if "getobjs" in params and params["getobjs"] and "root" in domain_json:
        root_id = domain_json["root"]
        include_attrs = False
        if "include_attrs" in params and params["include_attrs"]:
            include_attrs = True
        domain_objs = await getDomainObjects(app,
                                             root_id,
                                             include_attrs=include_attrs,
                                             bucket=bucket)
        rsp_json["domain_objs"] = domain_objs

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'database',
            'href': getHref(request, '/datasets')
        })
        hrefs.append({'rel': 'groupbase', 'href': getHref(request, '/groups')})
        hrefs.append({
            'rel': 'typebase',
            'href': getHref(request, '/datatypes')
        })
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })

    hrefs.append({'rel': 'acls', 'href': getHref(request, '/acls')})
    parent_domain = getParentDomain(domain)
    if not parent_domain or getPathForDomain(parent_domain) == '/':
        is_toplevel = True
    else:
        is_toplevel = False
    log.debug(f"href parent domain: {parent_domain}")
    if not is_toplevel:
        hrefs.append({
            'rel': 'parent',
            'href': getHref(request, '/', domain=parent_domain)
        })

    rsp_json["hrefs"] = hrefs
    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Example #6
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
    else:
        expected_content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    resp = {}
    query_update = None
    limit = 0
    chunk_init = True
    input_arr = None
    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        if rank != 1:
            log.error("expected one-dimensional array for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        if "Limit" in params:
            limit = int(params["Limit"])
        chunk_init = False
    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements *
                                           itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(
            request
        )  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=chunk_init,
                               bucket=bucket)
    is_dirty = False
    if query:
        values = []
        indices = []
        if chunk_arr is not None:
            # do query selection
            limit = 0
            if "Limit" in params:
                limit = int(params["Limit"])

            field_names = list(dt.fields.keys())
            replace_mask = [
                None,
            ] * len(field_names)
            for i in range(len(field_names)):
                field_name = field_names[i]
                if field_name in query_update:
                    replace_mask[i] = query_update[field_name]
            log.debug(f"replace_mask: {replace_mask}")

            x = chunk_arr[selection]
            log.debug(f"put_query - x: {x}")
            eval_str = getEvalStr(query, "x", field_names)
            log.debug(f"put_query - eval_str: {eval_str}")
            where_result = np.where(eval(eval_str))
            log.debug(f"put_query - where_result: {where_result}")
            where_result_index = where_result[0]
            log.debug(f"put_query - whare_result index: {where_result_index}")
            log.debug(
                f"put_query - boolean selection: {x[where_result_index]}")
            s = selection[0]
            count = 0
            for index in where_result_index:
                log.debug(f"put_query - index: {index}")
                value = x[index].copy()
                log.debug(f"put_query - original value: {value}")
                for i in range(len(field_names)):
                    if replace_mask[i] is not None:
                        value[i] = replace_mask[i]
                log.debug(f"put_query - modified value: {value}")
                try:
                    chunk_arr[index] = value
                except ValueError as ve:
                    log.error(f"Numpy Value updating array: {ve}")
                    raise HTTPInternalServerError()

                json_val = bytesArrayToList(value)
                log.debug(f"put_query - json_value: {json_val}")
                json_index = index.tolist(
                ) * s.step + s.start  # adjust for selection
                indices.append(json_index)
                values.append(json_val)
                count += 1
                is_dirty = True
                if limit > 0 and count >= limit:
                    log.info("put_query - got limit items")
                    break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result returning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # update chunk array
        try:
            chunk_arr[selection] = input_arr
        except ValueError as ve:
            log.error(f"Numpy Value updating array: {ve}")
            raise HTTPInternalServerError()
        is_dirty = True
        resp = json_response({}, status=201)

    if is_dirty:
        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)
        log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    log.response(request, resp=resp)
    return resp
Example #7
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points, num_points: {num_points}")

        put_points = True
    else:
        log.info("POST Chunk get points")
    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    log.debug(f"dset_json: {dset_json}")
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug(f"chunk_coord: {chunk_coord}")

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug(f"dtype: {dset_dtype}")

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size,
                               chunk_init=put_points)

    if chunk_arr is None:
        if put_points:
            log.error("no array returned for put_points")
            raise HTTPInternalServerError()
        else:
            # get points on a non-existent S3 objects?
            log.warn("S3 object not found for get points")
            raise HTTPNotFound()

    log.debug(f"chunk_arr.shape: {chunk_arr.shape}")

    if put_points:
        # writing point data

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = f"({rank},)uint64"
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)),
                               ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)

        if len(point_arr) != num_points:
            msg = f"Unexpected size of point array, got: {len(point_arr)} expected: {num_points}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        for i in range(num_points):
            elem = point_arr[i]
            log.debug(f"non-relative coordinate: {elem}")
            if rank == 1:
                coord = int(elem[0])
                coord = coord % chunk_layout[0]  # adjust to chunk relative

            else:
                coord = elem[0]  # index to update
                for dim in range(rank):
                    # adjust to chunk relative
                    coord[dim] = int(coord[dim]) % chunk_layout[dim]
                coord = tuple(coord)  # need to convert to a tuple
            log.debug(f"relative coordinate: {coord}")

            val = elem[1]  # value
            try:
                chunk_arr[coord] = val  # update the point
            except IndexError:
                msg = "Out of bounds point index for POST Chunk"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)
        log.info(f"set {chunk_id} to dirty")

    else:
        # reading point data
        point_dt = np.dtype('uint64')  # use unsigned long for point index
        point_arr = np.fromstring(
            input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug(f"got {num_points} points")

        point_arr = point_arr.reshape((num_points, rank))
        output_arr = np.zeros((num_points, ), dtype=dset_dtype)

        for i in range(num_points):
            point = point_arr[i, :]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val

    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp
Example #8
0
async def getS3JSONObj(app, key, bucket=None):
    """ Get S3 object identified by key and read as JSON
    """

    client = getS3Client(app)
    if not bucket:
        bucket = app['bucket_name']
    if key[0] == '/':
        key = key[1:]  # no leading slash
    log.info(f"getS3JSONObj(s3://{bucket})/{key}")
    s3_stats_increment(app, "get_count")
    start_time = time.time()
    try:
        resp = await client.get_object(Bucket=bucket, Key=key)
        data = await resp['Body'].read()
        finish_time = time.time()
        log.info(
            f"s3Util.getS3JSONObj({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
        )
        resp['Body'].close()
    except ClientError as ce:
        # key does not exist?
        # check for not found status
        # Note: Error.Code should always exist - cf https://github.com/boto/botocore/issues/885
        response_code = ce.response['Error']['Code']
        log.info(
            f"ClientError on getS3JSONObj key: {key} bucket: {bucket}: {response_code}"
        )

        # remove key from pending map if present
        if "pending_s3_read" in app:
            pending_s3_read = app["pending_s3_read"]
            if key in pending_s3_read:
                log.debug(f"remove {key} from pending_s3_read")
                del pending_s3_read[key]

        if response_code == "NoSuchKey":
            msg = f"s3_key: {key} not found "
            log.info(msg)
            raise HTTPNotFound()
        elif response_code == "NoSuchBucket":
            msg = f"s3_bucket: {bucket} not fiound"
            log.info(msg)
            raise HTTPNotFound()
        else:
            s3_stats_increment(app, "error_count")
            log.warn(f"got ClientError on s3 get: {ce}")
            msg = "Error getting s3 obj: " + str(ce)
            log.error(msg)
            raise HTTPInternalServerError()

    s3_stats_increment(app, "bytes_in", inc=len(data))
    try:
        json_dict = json.loads(data.decode('utf8'))
    except UnicodeDecodeError:
        s3_stats_increment(app, "error_count")
        log.error(f"Error loading JSON at key: {key}")
        msg = "Unexpected i/o error"
        raise HTTPInternalServerError()

    log.debug(f"s3 key {key} returned: {json_dict}")
    return json_dict
Example #9
0
async def getS3Bytes(app,
                     key,
                     shuffle=0,
                     deflate_level=None,
                     s3offset=0,
                     s3size=None,
                     bucket=None):
    """ Get S3 object identified by key and read as bytes
    """

    client = getS3Client(app)
    if not bucket:
        bucket = app['bucket_name']
    if key[0] == '/':
        key = key[1:]  # no leading slash
    log.info(f"getS3Bytes(s3://{bucket}/{key})")
    start_time = time.time()
    s3_stats_increment(app, "get_count")
    range = ""
    if s3size:
        range = f"bytes={s3offset}-{s3offset+s3size-1}"
        log.info(f"s3 range request: {range}")

    try:

        resp = await client.get_object(Bucket=bucket, Key=key, Range=range)
        data = await resp['Body'].read()
        finish_time = time.time()
        log.info(
            f"s3Util.getS3Bytes({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
        )

        resp['Body'].close()
    except ClientError as ce:
        # key does not exist?
        # check for not found status
        response_code = ce.response["Error"]["Code"]
        if response_code == "NoSuchKey":
            msg = f"s3_key: {key} not found "
            log.warn(msg)
            raise HTTPInternalServerError()
        elif response_code == "NoSuchBucket":
            msg = f"s3_bucket: {bucket} not fiound"
            log.info(msg)
            raise HTTPNotFound()
        else:
            s3_stats_increment(app, "error_count")
            log.error(f"got unexpected ClientError on s3 get {key}: {ce}")
            raise HTTPInternalServerError()

    if data and len(data) > 0:
        s3_stats_increment(app, "bytes_in", inc=len(data))
        log.info(f"read: {len(data)} bytes for S3 key: {key}")
        if deflate_level is not None:
            try:
                unzip_data = zlib.decompress(data)
                log.info(f"uncompressed to {len(unzip_data)} bytes")
                data = unzip_data
            except zlib.error as zlib_error:
                log.info(f"zlib_err: {zlib_error}")
                log.warn(f"unable to uncompress s3 obj: {key}")
        if shuffle > 0:
            unshuffled = _unshuffle(shuffle, data)
            log.info(f"unshuffled to {len(unshuffled)} bytes")
            data = unshuffled

    return data
Example #10
0
async def GET_AttributeValue(request):
    """HTTP method to return an attribute value"""
    log.request(request)
    app = request.app
    log.info("GET_AttributeValue")
    collection = getRequestCollectionName(
        request)  # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    validateAttributeName(attr_name)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain value: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "read")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(dn_json))

    attr_shape = dn_json["shape"]
    log.debug(f"attribute shape: {attr_shape}")
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be read"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    accept_type = getAcceptType(request)
    response_type = accept_type  # will adjust later if binary not possible
    type_json = dn_json["type"]
    shape_json = dn_json["shape"]
    item_size = getItemSize(type_json)

    if item_size == 'H5T_VARIABLE' and accept_type != "json":
        msg = "Client requested binary, but only JSON is supported for variable length data types"
        log.info(msg)
        response_type = "json"

    if response_type == "binary":
        arr_dtype = createDataType(type_json)  # np datatype
        np_shape = getShapeDims(shape_json)
        try:
            arr = jsonToArray(np_shape, arr_dtype, dn_json["value"])
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        output_data = arr.tobytes()
        log.debug(
            f"GET AttributeValue - returning {len(output_data)} bytes binary data"
        )
        # write response
        try:
            resp = StreamResponse()
            resp.content_type = "application/octet-stream"
            resp.content_length = len(output_data)
            # allow CORS
            resp.headers['Access-Control-Allow-Origin'] = '*'
            resp.headers[
                'Access-Control-Allow-Methods'] = "GET, POST, DELETE, PUT, OPTIONS"
            resp.headers[
                'Access-Control-Allow-Headers'] = "Content-Type, api_key, Authorization"
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Got exception: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    else:
        resp_json = {}
        if "value" in dn_json:
            resp_json["value"] = dn_json["value"]

        hrefs = []
        obj_uri = '/' + collection + '/' + obj_id
        attr_uri = obj_uri + '/attributes/' + attr_name
        hrefs.append({'rel': 'self', 'href': getHref(request, attr_uri)})
        hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
        hrefs.append({'rel': 'owner', 'href': getHref(request, obj_uri)})
        resp_json["hrefs"] = hrefs
        resp = await jsonResponse(request, resp_json)
        log.response(request, resp=resp)
    return resp
Example #11
0
async def PUT_AttributeValue(request):
    """HTTP method to update an attributes data"""
    log.request(request)
    log.info("PUT_AttributeValue")
    app = request.app
    collection = getRequestCollectionName(
        request)  # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    log.debug(f"Attribute name: [{attr_name}]")
    validateAttributeName(attr_name)

    log.info(f"PUT Attribute Value id: {obj_id} name: {attr_name}")
    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT AttributeValue with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPInternalServerError()

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "update")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(obj_id))
    log.debug(f"got dn_json: {dn_json}")

    attr_shape = dn_json["shape"]
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be updated"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    np_shape = getShapeDims(attr_shape)
    type_json = dn_json["type"]
    np_dtype = createDataType(type_json)  # np datatype

    request_type = "json"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type not in ("application/json",
                                "application/octet-stream"):
            msg = f"Unknown content_type: {content_type}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if content_type == "application/octet-stream":
            log.debug("PUT AttributeValue - request_type is binary")
            request_type = "binary"
        else:
            log.debug("PUT AttribueValue - request type is json")

    binary_data = None
    if request_type == "binary":
        item_size = getItemSize(type_json)

        if item_size == 'H5T_VARIABLE':
            msg = "Only JSON is supported for variable length data types"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        # read binary data
        binary_data = await request.read()
        if len(binary_data) != request.content_length:
            msg = f"Read {len(binary_data)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

    arr = None  # np array to hold request data

    if binary_data:
        npoints = getNumElements(np_shape)
        if npoints * item_size != len(binary_data):
            msg = "Expected: " + str(
                npoints * item_size) + " bytes, but got: " + str(
                    len(binary_data))
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        arr = np.fromstring(binary_data, dtype=np_dtype)
        arr = arr.reshape(np_shape)  # conform to selection shape
        # convert to JSON for transmission to DN
        data = arr.tolist()
        value = bytesArrayToList(data)
    else:
        body = await request.json()

        if "value" not in body:
            msg = "PUT attribute value with no value in body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        value = body["value"]

        # validate that the value agrees with type/shape
        try:
            arr = jsonToArray(np_shape, np_dtype, value)
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    log.info(f"Got: {arr.size} array elements")

    # ready to add attribute now
    attr_json = {}
    attr_json["type"] = type_json
    attr_json["shape"] = attr_shape
    attr_json["value"] = value

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.info(f"PUT Attribute Value: {req}")

    dn_json["value"] = value
    params = {}
    params = {"replace": 1}  # let the DN know we can overwrite the attribute
    if bucket:
        params["bucket"] = bucket
    put_rsp = await http_put(app, req, params=params, data=attr_json)
    log.info(f"PUT Attribute Value resp: {put_rsp}")

    hrefs = []  # TBD
    req_rsp = {"hrefs": hrefs}
    # attribute creation successful
    resp = await jsonResponse(request, req_rsp)
    log.response(request, resp=resp)
    return resp
Example #12
0
async def GET_Attributes(request):
    """ Return JSON for attribute collection
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query

    obj_id = get_obj_id(request)  
     
    include_data = False
    if "IncludeData" in params and params["IncludeData"]:
        include_data = True

    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
            log.info("GET_Links - using Limit: {}".format(limit))
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.error(msg)  # should be validated by SN
            raise HTTPInternalServerError()

    marker = None
    if "Marker" in params:
        marker = params["Marker"]
        log.info("GET_Links - using Marker: {}".format(marker))
     
    obj_json = await get_metadata_obj(app, obj_id)
    
    log.debug("GET attributes obj_id: {} got json".format(obj_id))
    if "attributes" not in obj_json:
        msg = "unexpected data for obj id: {}".format(obj_id)
        msg.error(msg)
        raise HTTPInternalServerError()

    # return a list of attributes based on sorted dictionary keys
    attr_dict = obj_json["attributes"]
    attr_names = list(attr_dict.keys())
    attr_names.sort()  # sort by key 
    # TBD: provide an option to sort by create date

    start_index = 0
    if marker is not None:
        start_index = index(attr_names, marker) + 1
        if start_index == 0:
            # marker not found, return 404
            msg = "attribute marker: {}, not found".format(marker)
            log.warn(msg)
            raise HTTPNotFound()

    end_index = len(attr_names) 
    if limit is not None and (end_index - start_index) > limit:
        end_index = start_index + limit
    
    attr_list = []
    for i in range(start_index, end_index):
        attr_name = attr_names[i]
        src_attr = attr_dict[attr_name]
        des_attr = {}
        des_attr["created"] = src_attr["created"]
        des_attr["type"] = src_attr["type"]
        des_attr["shape"] = src_attr["shape"]
        des_attr["name"] = attr_name
        if include_data:
            des_attr["value"] = src_attr["value"]
        attr_list.append(des_attr)

    resp_json = {"attributes": attr_list} 
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp    
Example #13
0
async def PUT_Attribute(request):
    """ Handler for PUT /(obj)/<id>/attributes/<name>
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    obj_id = get_obj_id(request) 

    attr_name = request.match_info.get('name')
    log.info("PUT attribute {} in {}".format(attr_name, obj_id))
    validateAttributeName(attr_name)
        
    if not request.has_body:
        log.error( "PUT_Attribute with no body")
        raise HTTPBadRequest(message="body expected")

    body = await request.json() 
    
    replace = False
    if "replace" in params and params["replace"]:
        replace = True
        log.info("replace attribute")
    datatype = None
    shape = None
    value = None

    if "type" not in body:
        log.error("PUT attribute with no type in body")
        raise HTTPInternalServerError()

    datatype = body["type"]

    if "shape" not in body:
        log.error("PUT attribute with no shape in body")
        raise HTTPInternalServerError()
    shape = body["shape"]

    if "value" in body:
        value = body["value"]

    obj_json = await get_metadata_obj(app, obj_id)
    log.debug("PUT attribute obj_id: {} got json".format(obj_id))

    if "attributes" not in obj_json:
        log.error("unexpected obj data for id: {}".format(obj_id))
        raise HTTPInternalServerError()

    attributes = obj_json["attributes"]
    if attr_name in attributes and not replace:
        # Attribute already exists, return a 409
        log.warn("Attempt to overwrite attribute: {} in obj_id:".format(attr_name, obj_id))
        raise HTTPConflict()
    
    if replace and attr_name not in attributes:
        # Replace requires attribute exists
        log.warn("Attempt to update missing attribute: {} in obj_id:".format(attr_name, obj_id))
        raise HTTPNotFound()

    if replace:
        orig_attr = attributes[attr_name]
        create_time = orig_attr["created"]
    else:
        create_time = time.time()

    # ok - all set, create attribute obj
    attr_json = {"type": datatype, "shape": shape, "value": value, "created": create_time }
    attributes[attr_name] = attr_json
     
    # write back to S3, save to metadata cache
    await save_metadata_obj(app, obj_id, obj_json)
 
    resp_json = { } 

    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
Example #14
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    query_update = None
    limit = 0
    bucket = None
    input_arr = None

    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    if "Limit" in params:
        limit = int(params["Limit"])
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
        chunk_init = False  # don't initalize new chunks on query update
    else:
        expected_content_type = "application/octet-stream"
        chunk_init = True
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    # TBD - does this work with linked datasets?
    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    type_json = dset_json["type"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    chunk_arr = await get_chunk(app,
                                chunk_id,
                                dset_json,
                                bucket=bucket,
                                chunk_init=chunk_init)
    is_dirty = False
    if chunk_arr is None:
        if chunk_init:
            log.error("failed to create numpy array")
            raise HTTPInternalServerError()
        else:
            log.warn(f"chunk {chunk_id} not found")
            raise HTTPNotFound()

    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        if rank != 1:
            log.error("expected one-dimensional array for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        # TBD - send back binary response to SN node
        try:
            resp = chunkQuery(chunk_id=chunk_id,
                              chunk_layout=dims,
                              chunk_arr=chunk_arr,
                              slices=selection,
                              query=query,
                              query_update=query_update,
                              limit=limit,
                              return_json=True)
        except TypeError as te:
            log.warn(f"chunkQuery - TypeError: {te}")
            raise HTTPBadRequest()
        except ValueError as ve:
            log.warn(f"chunkQuery - ValueError: {ve}")
            raise HTTPBadRequest()
        if query_update and resp is not None:
            is_dirty = True

    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements *
                                           itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(
            request
        )  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

        is_dirty = chunkWriteSelection(chunk_arr=chunk_arr,
                                       slices=selection,
                                       data=input_arr)

        # chunk update successful
        resp = {}
    if is_dirty:
        save_chunk(app, chunk_id, dset_json, bucket=bucket)
        status_code = 201
    else:
        status_code = 200

    resp = json_response(resp, status=status_code)
    log.response(request, resp=resp)
    return resp
Example #15
0
async def index(request: Request):
    try:
        return Response(body=json.dumps({"host": socket.gethostname()}), headers={'content-type': 'application/json'})
    except Exception as ex:
        log.warning(f"Endpoint: /, Method: get. Error:{str(ex)}")
        return HTTPInternalServerError()
Example #16
0
async def putS3Bytes(app,
                     key,
                     data,
                     shuffle=0,
                     deflate_level=None,
                     bucket=None):
    """ Store byte string as S3 object with given key
    """

    client = getS3Client(app)
    if not bucket:
        bucket = app['bucket_name']
    if key[0] == '/':
        key = key[1:]  # no leading slash
    log.info(f"putS3Bytes(s3://{bucket}/{key}), {len(data)} bytes")
    s3_stats_increment(app, "put_count")
    if shuffle > 0:
        shuffled_data = _shuffle(shuffle, data)
        log.info(f"shuffled data to {len(shuffled_data)}")
        data = shuffled_data

    if deflate_level is not None:
        try:
            # the keyword parameter is enabled with py3.6
            # zip_data = zlib.compress(data, level=deflate_level)
            zip_data = zlib.compress(data, deflate_level)
            log.info(
                f"compressed from {len(data)} bytes to {len(zip_data)} bytes with level: {deflate_level}"
            )
            data = zip_data
        except zlib.error as zlib_error:
            log.info(f"zlib_err: {zlib_error}")
            log.warn(f"unable to compress s3 obj: {key}, using raw bytes")

    try:
        start_time = time.time()
        rsp = await client.put_object(Bucket=bucket, Key=key, Body=data)
        finish_time = time.time()
        log.info(
            f"s3Util.putS3Bytes({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
        )
        s3_rsp = {
            "etag": rsp["ETag"],
            "size": len(data),
            "lastModified": int(finish_time)
        }
    except ClientError as ce:
        s3_stats_increment(app, "error_count")
        msg = f"ClientError putting s3 obj {key}: {ce}"
        log.error(msg)
        raise HTTPInternalServerError()
    except CancelledError as cle:
        s3_stats_increment(app, "error_count")
        msg = f"CancelledError putting s3 obj {key}: {cle}"
        log.error(msg)
        raise HTTPInternalServerError()
    except Exception as e:
        s3_stats_increment(app, "error_count")
        msg = f"Unexpected Exception {type(e)} putting s3 obj {key}: {e}"
        log.error(msg)
        raise HTTPInternalServerError()
    if data and len(data) > 0:
        s3_stats_increment(app, "bytes_in", inc=len(data))
    log.debug(f"putS3Bytes complete for s3 obj {key}, s3_rsp: {s3_rsp}")
    # s3 rsp format:
    # {'ETag': '"1b95a7bf5fab6f5c0620b8e3b30a53b9"', 'ResponseMetadata':
    #     {'HostId': '', 'HTTPHeaders': {'X-Amz-Request-Id': '1529F570A809AD26', 'Server': 'Minio/RELEASE.2017-08-05T00-00-53Z (linux; amd64)', 'Vary': 'Origin', 'Date': 'Sun, 29 Apr 2018 16:36:53 GMT', 'Content-Length': '0', 'Content-Type': 'text/plain; charset=utf-8', 'Etag': '"1b95a7bf5fab6f5c0620b8e3b30a53b9"', 'X-Amz-Bucket-Region': 'us-east-1', 'Accept-Ranges': 'bytes'},
    #       'HTTPStatusCode': 200, 'RequestId': '1529F570A809AD26'}}
    return s3_rsp
Example #17
0
async def getPathForObjectId(app,
                             parent_id,
                             idpath_map,
                             tgt_id=None,
                             bucket=None):
    """ Search the object starting with the given parent_id.
    idpath should be a dict with at minimum the key: parent_id: <parent_path>.
    If tgt_id is not None, returns first path that matches the tgt_id or None if not found.
    If Tgt_id is no, returns the idpath_map.
    """

    if not parent_id:
        log.error("No parent_id passed to getPathForObjectId")
        raise HTTPInternalServerError()

    if parent_id not in idpath_map:
        msg = f"Obj {parent_id} expected to be found in idpath_map"
        log.error(msg)
        raise HTTPInternalServerError()

    parent_path = idpath_map[parent_id]
    if parent_id == tgt_id:
        return parent_path

    req = getDataNodeUrl(app, parent_id)
    req += "/groups/" + parent_id + "/links"
    params = {}
    if bucket:
        params["bucket"] = bucket

    log.debug("getPathForObjectId LINKS: " + req)
    links_json = await http_get(app, req, params=params)
    log.debug(
        f"getPathForObjectId got links json from dn for parent_id: {parent_id}"
    )
    links = links_json["links"]

    h5path = None
    for link in links:
        if link["class"] != "H5L_TYPE_HARD":
            continue  # ignore everything except hard links
        link_id = link["id"]
        if link_id in idpath_map:
            continue  # this node has already been visited
        title = link["title"]
        if tgt_id is not None and link_id == tgt_id:
            # found it!
            h5path = op.join(parent_path, title)
            break
        idpath_map[link_id] = op.join(parent_path, title)
        if getCollectionForId(link_id) != "groups":
            continue
        h5path = await getPathForObjectId(app,
                                          link_id,
                                          idpath_map,
                                          tgt_id=tgt_id,
                                          bucket=bucket)  # recursive call
        if tgt_id is not None and h5path:
            break

    return h5path
Example #18
0
async def getS3ObjStats(app, key, bucket=None):
    """ Return etag, size, and last modified time for given object
    """

    client = getS3Client(app)
    if not bucket:
        bucket = app['bucket_name']
    stats = {}

    if key[0] == '/':
        #key = key[1:]  # no leading slash
        msg = f"key with leading slash: {key}"
        log.error(msg)
        raise KeyError(msg)

    log.info(f"getS3ObjStats({key})")

    s3_stats_increment(app, "list_count")
    try:
        resp = await client.list_objects(Bucket=bucket, MaxKeys=1, Prefix=key)
    except ClientError as ce:
        # key does not exist?
        s3_stats_increment(app, "error_count")
        msg = "Error listing s3 obj: " + str(ce)
        log.error(msg)
        raise HTTPInternalServerError()
    if 'Contents' not in resp:
        msg = f"key: {key} not found"
        log.info(msg)
        raise HTTPInternalServerError()
    contents = resp['Contents']
    log.debug(f"s3_contents: {contents}")

    found = False
    if len(contents) > 0:
        item = contents[0]
        if item["Key"] == key:
            # if the key is a S3 folder, the key will be the first object in the folder,
            # not the requested object
            found = True
            if item["ETag"]:
                etag = item["ETag"]
                if len(etag) > 2 and etag[0] == '"' and etag[-1] == '"':
                    # S3 returning extra quotes around etag?
                    etag = etag[1:-1]
                    stats["ETag"] = etag
            else:
                if "Owner" in item and "ID" in item["Owner"] and item["Owner"][
                        "ID"] == "minio":
                    pass  # minio is not creating ETags...
                else:
                    log.warn(f"No ETag for key: {key}")
                # If no ETAG put in a fake one
                stats["ETag"] = "9999"
            stats["Size"] = item["Size"]
            stats["LastModified"] = int(item["LastModified"].timestamp())
    if not found:
        msg = f"key: {key} not found"
        log.info(msg)
        raise HTTPNotFound()

    return stats
Example #19
0
async def GET_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {params.keys()}")

    s3path = None
    s3offset = 0
    s3size = 0
    bucket = None
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    if "dset" in params:
        msg = "Unexpected dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")
    type_json = dset_json["type"]

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    rank = len(dims)
    if rank == 0:
        msg = "No dimension passed to GET chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size)

    if chunk_arr is None:
        # return a 404
        msg = f"Chunk {chunk_id} does not exist"
        log.info(msg)
        raise HTTPNotFound()

    resp = None

    if "query" in params:
        # do query selection
        query = params["query"]
        log.info(f"query: {query}")
        if rank != 1:
            msg = "Query selection only supported for one dimensional arrays"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        limit = 0
        if "Limit" in params:
            limit = int(params["Limit"])

        values = []
        indices = []
        field_names = []
        if dt.fields:
            field_names = list(dt.fields.keys())

        x = chunk_arr[selection]
        log.debug(f"x: {x}")
        eval_str = getEvalStr(query, "x", field_names)
        log.debug(f"eval_str: {eval_str}")
        where_result = np.where(eval(eval_str))
        log.debug(f"where_result: {where_result}")
        where_result_index = where_result[0]
        log.debug(f"whare_result index: {where_result_index}")
        log.debug(f"boolean selection: {x[where_result_index]}")
        s = selection[0]
        count = 0
        for index in where_result_index:
            log.debug(f"index: {index}")
            value = x[index].tolist()
            log.debug(f"value: {value}")
            json_val = bytesArrayToList(value)
            log.debug(f"json_value: {json_val}")
            json_index = index.tolist(
            ) * s.step + s.start  # adjust for selection
            indices.append(json_index)
            values.append(json_val)
            count += 1
            if limit > 0 and count >= limit:
                log.info("got limit items")
                break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # get requested data
        output_arr = chunk_arr[selection]
        output_data = arrayToBytes(output_arr)

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()

        finally:
            await resp.write_eof()

    return resp
Example #20
0
    async def put_object(self, key, data, bucket=None):
        """ Write data to given key.
            Returns client specific dict on success
        """
        self._validateBucket(bucket)
        self._validateKey(key)

        dirpath = self._getFilePath(bucket)
        if not pp.isdir(dirpath):
            msg = f"fileClient.put_object - bucket at path: {dirpath} not found"
            log.warn(msg)
            raise HTTPNotFound()

        start_time = time.time()
        filepath = self._getFilePath(bucket, key)
        log.debug(f"fileClient.put_object({bucket}/{key} start: {start_time}")
        if "loop" in self._app:
            loop = self._app["loop"]
        else:
            loop = None
        try:
            key_dirs = key.split("/")
            log.debug(f"key_dirs: {key_dirs}")
            if len(key_dirs) > 1:
                # create directories in the path if they don't already exist
                key_dirs = key_dirs[:-1]
                for key_dir in key_dirs:
                    dirpath = pp.join(dirpath, key_dir)
                    log.debug(f"pp.join({key_dir}) => {dirpath}")

                    dirpath = pp.normpath(dirpath)
                    log.debug(f"normpath: {dirpath}")

                    if not pp.isdir(dirpath):
                        log.debug(f"mkdir({dirpath})")
                        mkdir(dirpath)
                    else:
                        log.debug(f"isdir {dirpath} found")
            log.debug(f"open({filepath}, 'wb')")
            async with aiofiles.open(filepath, loop=loop, mode='wb') as f:
                await f.write(data)
            finish_time = time.time()
            log.info(
                f"fileClient.put_object({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
            )
            write_rsp = self._getFileStats(filepath, data=data)
        except IOError as ioe:
            msg = f"fileClient: IOError writing {bucket}/{key}: {ioe}"
            log.warn(msg)
            raise HTTPInternalServerError()
        except CancelledError as cle:
            #file_stats_increment(app, "error_count")
            msg = f"CancelledError for put s3 obj {key}: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()

        except Exception as e:
            #file_stats_increment(app, "error_count")
            msg = f"fileClient Unexpected Exception {type(e)} writing  {bucket}/{key}: {e}"
            log.error(msg)
            raise HTTPInternalServerError()

        if data and len(data) > 0:
            self._file_stats_increment("bytes_out", inc=len(data))
        log.debug(
            f"fileClient.put_object {key} complete, write_rsp: {write_rsp}")
        return write_rsp
Example #21
0
async def getChunk(app,
                   chunk_id,
                   dset_json,
                   bucket=None,
                   s3path=None,
                   s3offset=0,
                   s3size=0,
                   chunk_init=False):
    # if the chunk cache has too many dirty items, wait till items get flushed to S3
    MAX_WAIT_TIME = 10.0  # TBD - make this a config
    chunk_cache = app['chunk_cache']
    if chunk_init and s3offset > 0:
        log.error(
            f"unable to initiale chunk {chunk_id} for reference layouts ")
        raise HTTPInternalServerError()

    log.debug(
        f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}"
    )

    chunk_arr = None
    dset_id = getDatasetId(chunk_id)
    dims = getChunkLayout(dset_json)
    type_json = dset_json["type"]
    dt = createDataType(type_json)
    # note - officially we should follow the order in which the filters are defined in the filter_list,
    # but since we currently have just deflate and shuffle we will always apply deflate then shuffle on read,
    # and shuffle then deflate on write
    # also note - get deflate and shuffle will update the deflate and shuffle map so that the s3sync will do the right thing
    deflate_level = getDeflate(app, dset_id, dset_json)
    shuffle = getShuffle(app, dset_id, dset_json)
    s3key = None

    if s3path:
        if not s3path.startswith("s3://"):
            # TBD - verify these at dataset creation time?
            log.error(f"unexpected s3path for getChunk: {s3path}")
            raise HTTPInternalServerError()
        path = s3path[5:]
        index = path.find('/')  # split bucket and key
        if index < 1:
            log.error(f"s3path is invalid: {s3path}")
            raise HTTPInternalServerError()
        bucket = path[:index]
        s3key = path[(index + 1):]
        log.debug(f"Using s3path bucket: {bucket} and  s3key: {s3key}")
    else:
        s3key = getS3Key(chunk_id)
        log.debug(f"getChunk chunkid: {chunk_id} bucket: {bucket}")
    if chunk_id in chunk_cache:
        chunk_arr = chunk_cache[chunk_id]
    else:
        if s3path and s3size == 0:
            obj_exists = False
        else:
            obj_exists = await isStorObj(app, s3key, bucket=bucket)
        # TBD - potential race condition?
        if obj_exists:
            pending_s3_read = app["pending_s3_read"]

            if chunk_id in pending_s3_read:
                # already a read in progress, wait for it to complete
                read_start_time = pending_s3_read[chunk_id]
                log.info(
                    f"s3 read request for {chunk_id} was requested at: {read_start_time}"
                )
                while time.time() - read_start_time < 2.0:
                    log.debug("waiting for pending s3 read, sleeping")
                    await asyncio.sleep(1)  # sleep for sub-second?
                    if chunk_id in chunk_cache:
                        log.info(f"Chunk {chunk_id} has arrived!")
                        chunk_arr = chunk_cache[chunk_id]
                        break
                if chunk_arr is None:
                    log.warn(
                        f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read"
                    )

            if chunk_arr is None:
                if chunk_id not in pending_s3_read:
                    pending_s3_read[chunk_id] = time.time()
                log.debug(f"Reading chunk {s3key} from S3")

                chunk_bytes = await getStorBytes(app,
                                                 s3key,
                                                 shuffle=shuffle,
                                                 deflate_level=deflate_level,
                                                 offset=s3offset,
                                                 length=s3size,
                                                 bucket=bucket)
                if chunk_id in pending_s3_read:
                    # read complete - remove from pending map
                    elapsed_time = time.time() - pending_s3_read[chunk_id]
                    log.info(f"s3 read for {s3key} took {elapsed_time}")
                    del pending_s3_read[chunk_id]
                else:
                    log.warn(
                        f"expected to find {chunk_id} in pending_s3_read map")
                chunk_arr = bytesToArray(chunk_bytes, dt, dims)

            log.debug(f"chunk size: {chunk_arr.size}")

        elif chunk_init:
            log.debug(f"Initializing chunk {chunk_id}")
            fill_value = getFillValue(dset_json)
            if fill_value:
                # need to convert list to tuples for numpy broadcast
                if isinstance(fill_value, list):
                    fill_value = tuple(fill_value)
                chunk_arr = np.empty(dims, dtype=dt, order='C')
                chunk_arr[...] = fill_value
            else:
                chunk_arr = np.zeros(dims, dtype=dt, order='C')
        else:
            log.debug(f"Chunk {chunk_id} not found")

        if chunk_arr is not None:
            # check that there's room in the cache before adding it
            if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                # no room in the cache, wait till space is freed by the s3sync task
                wait_start = time.time()
                while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                    log.warn(
                        f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed"
                    )
                    if time.time() - wait_start > MAX_WAIT_TIME:
                        log.error(
                            f"unable to save updated chunk {chunk_id} to cache returning 503 error"
                        )
                        raise HTTPServiceUnavailable()
                    await asyncio.sleep(1)

            chunk_cache[chunk_id] = chunk_arr  # store in cache
    return chunk_arr
Example #22
0
    async def put_object(self, key, data, bucket=None):
        """ Write data to given key.
            Returns client specific dict on success
        """
        if not bucket:
            log.error("put_object - bucket not set")
            raise HTTPInternalServerError()

        start_time = time.time()
        log.debug(
            f"azureBlobClient.put_object({bucket}/{key} start: {start_time}")
        try:
            async with self._client.get_blob_client(container=bucket,
                                                    blob=key) as blob_client:
                blob_rsp = await blob_client.upload_blob(data,
                                                         blob_type='BlockBlob',
                                                         overwrite=True)

            finish_time = time.time()
            ETag = blob_rsp["etag"]
            lastModified = int(blob_rsp["last_modified"].timestamp())
            data_size = len(data)
            rsp = {
                "ETag": ETag,
                "size": data_size,
                "LastModified": lastModified
            }
            log.debug(f"put_object {key} returning: {rsp}")

            log.info(
                f"azureBlobClient.put_object({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
            )

        except CancelledError as cle:
            self._azure_stats_increment("error_count")
            msg = f"azureBlobClient.CancelledError for put_object {key}: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()
        except Exception as e:
            if isinstance(e, AzureError):
                if e.status_code == 404:
                    msg = f"azureBlobClient.key: {key} not found "
                    log.warn(msg)
                    raise HTTPNotFound()
                elif e.status_code in (401, 403):
                    msg = f"azureBlobClient.access denied for get key: {key}"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._azure_stats_increment("error_count")
                    log.error(
                        f"azureBlobClient.got unexpected AzureError for get_object {key}: {e.message}"
                    )
                    raise HTTPInternalServerError()
            else:
                log.error(
                    f"azureBlobClient.Unexpected exception for put_object {key}: {e}"
                )
                raise HTTPInternalServerError()

        if data and len(data) > 0:
            self._azure_stats_increment("bytes_out", inc=len(data))
        log.debug(f"azureBlobClient.put_object {key} complete, rsp: {rsp}")
        return rsp
Example #23
0
async def GET_Datatypes(request):
    """HTTP method to return datatype collection for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code in (404, 410):
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"Unexpected Error: {ce.code})")
            raise HTTPInternalServerError()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug(f"got domain_json: {domain_json}")
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    # get the datatype collection list
    obj_ids = []
    if "root" in domain_json or domain_json["root"]:
        # get the groups collection list
        collections = await get_collections(app, domain_json["root"])
        objs = collections["datatypes"]
        obj_ids = getIdList(objs, marker=marker, limit=limit)

    # create hrefs
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/datatypes')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})

    # return obj ids and hrefs
    rsp_json = {}
    rsp_json["datatypes"] = obj_ids
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Example #24
0
    async def list_keys(self,
                        prefix='',
                        deliminator='',
                        suffix='',
                        include_stats=False,
                        callback=None,
                        bucket=None,
                        limit=None):
        """ return keys matching the arguments
        """
        if not bucket:
            log.error("list_keys - bucket not set")
            raise HTTPInternalServerError()

        log.info(
            f"list_keys('{prefix}','{deliminator}','{suffix}', include_stats={include_stats}"
        )
        if deliminator and deliminator != '/':
            msg = "Only '/' is supported as deliminator"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if include_stats:
            # use a dictionary to hold return values
            key_names = {}
        else:
            # just use a list
            key_names = []
        continuation_token = None
        page_result_count = 1000  # compatible with what S3 uses by default
        if prefix == '':
            prefix = None  # azure sdk expects None for no prefix
        try:
            async with self._client.get_container_client(
                    container=bucket) as container_client:
                while True:
                    log.info(
                        f"list_blobs: {prefix} continuation_token: {continuation_token}"
                    )
                    keyList = container_client.walk_blobs(
                        name_starts_with=prefix,
                        delimiter=deliminator,
                        results_per_page=page_result_count).by_page(
                            continuation_token)

                    async for key in await keyList.__anext__():
                        key_name = key["name"]
                        if include_stats:
                            ETag = key["etag"]
                            lastModified = int(
                                key["last_modified"].timestamp())
                            data_size = key["size"]
                            key_names[key_name] = {
                                "ETag": ETag,
                                "Size": data_size,
                                "LastModified": lastModified
                            }
                        else:
                            if suffix and not key_name.endswith(suffix):
                                continue
                            if deliminator and key_name[-1] != '/':
                                # only return folders
                                continue
                            if limit and len(key_names) >= limit:
                                break
                            key_names.append(key_name)
                    if callback:
                        if iscoroutinefunction(callback):
                            await callback(self._app, key_names)
                        else:
                            callback(self._app, key_names)
                    if not keyList.continuation_token or (
                            limit and len(key_names) >= limit):
                        # got all the keys (or as many as requested)
                        break
                    else:
                        # keep going
                        continuation_token = keyList.continuation_token

        except CancelledError as cle:
            self._azure_stats_increment("error_count")
            msg = f"azureBlobClient.CancelledError for list_keys: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()
        except Exception as e:
            if isinstance(e, AzureError):
                if e.status_code == 404:
                    msg = "azureBlobClient not found error for list_keys"
                    log.warn(msg)
                    raise HTTPNotFound()
                elif e.status_code in (401, 403):
                    msg = "azureBlobClient.access denied for list_keys"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._azure_stats_increment("error_count")
                    log.error(
                        f"azureBlobClient.got unexpected AzureError for list_keys: {e.message}"
                    )
                    raise HTTPInternalServerError()
            else:
                log.error(
                    f"azureBlobClient.Unexpected exception for list_keys: {e}")
                raise HTTPInternalServerError()

        log.info(f"list_keys done, got {len(key_names)} keys")
        if limit and len(key_names) > limit:
            # return requested number of keys
            if include_stats:
                keys = list(key_names.keys())
                keys.sort()
                for k in keys[limit:]:
                    del key_names[k]
            else:
                key_names = key_names[:limit]

        return key_names
Example #25
0
async def PUT_Domain(request):
    """HTTP method to create a new domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query
    # verify username, password
    username, pswd = getUserPasswordFromRequest(
        request)  # throws exception if user/password is not valid
    await validateUserPassword(app, username, pswd)

    # inital perms for owner and default
    owner_perm = {
        'create': True,
        'read': True,
        'update': True,
        'delete': True,
        'readACL': True,
        'updateACL': True
    }
    default_perm = {
        'create': False,
        'read': True,
        'update': False,
        'delete': False,
        'readACL': False,
        'updateACL': False
    }

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    bucket = getBucketForDomain(domain)

    log.info(f"PUT domain: {domain}, bucket: {bucket}")

    body = None
    if request.has_body:
        body = await request.json()
        log.debug(f"PUT domain with body: {body}")

    if ("flush" in params and params["flush"]) or (body and "flush" in body
                                                   and body["flush"]):
        # flush domain - update existing domain rather than create a new resource
        domain_json = await getDomainJson(app, domain, reload=True)
        log.debug(f"got domain_json: {domain_json}")

        if domain_json is None:
            log.warn(f"domain: {domain} not found")
            raise HTTPNotFound()

        if 'owner' not in domain_json:
            log.error("No owner key found in domain")
            raise HTTPInternalServerError()

        if 'acls' not in domain_json:
            log.error("No acls key found in domain")
            raise HTTPInternalServerError()

        aclCheck(domain_json, "update",
                 username)  # throws exception if not allowed
        if "root" in domain_json:
            # nothing to do for folder objects
            await doFlush(app, domain_json["root"], bucket=bucket)
        # flush  successful
        resp = await jsonResponse(request, None, status=204)
        log.response(request, resp=resp)
        return resp

    is_folder = False
    owner = username
    linked_domain = None
    linked_bucket = None
    root_id = None

    if body and "folder" in body:
        if body["folder"]:
            is_folder = True
    if body and "owner" in body:
        owner = body["owner"]
    if body and "linked_domain" in body:
        if is_folder:
            msg = "Folder domains can not be used for links"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        linked_domain = body["linked_domain"]
        if not isValidDomain(linked_domain):
            msg = f"linked_domain: {linked_domain} is not valid"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if "linked_bucket" in body:
            linked_bucket = body["linked_bucket"]
        elif bucket:
            linked_bucket = bucket
        elif "bucket_name" in request.app and request.app["bucket_name"]:
            linked_bucket = request.app["bucket_name"]
        else:
            linked_bucket = None

        if not linked_bucket:
            msg = "Could not determine bucket for linked domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if owner != username and username != "admin":
        log.warn("Only admin users are allowed to set owner for new domains")
        raise HTTPForbidden()

    parent_domain = getParentDomain(domain)
    log.debug(f"Parent domain: [{parent_domain}]")

    if not parent_domain or getPathForDomain(parent_domain) == '/':
        is_toplevel = True
    else:
        is_toplevel = False

    if is_toplevel and not is_folder:
        msg = "Only folder domains can be created at the top-level"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if is_toplevel and username != "admin":
        msg = "creation of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    parent_json = None
    if not is_toplevel:
        try:
            parent_json = await getDomainJson(app, parent_domain, reload=True)
        except ClientResponseError as ce:
            if ce.code == 404:
                msg = f"Parent domain: {parent_domain} not found"
                log.warn(msg)
                raise HTTPNotFound()
            elif ce.code == 410:
                msg = f"Parent domain: {parent_domain} removed"
                log.warn(msg)
                raise HTTPGone()
            else:
                log.error(f"Unexpected error: {ce.code}")
                raise HTTPInternalServerError()

        log.debug(f"parent_json {parent_domain}: {parent_json}")
        if "root" in parent_json and parent_json["root"]:
            msg = "Parent domain must be a folder"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if parent_json:
        aclCheck(parent_json, "create",
                 username)  # throws exception if not allowed

    if linked_domain:
        linked_json = await getDomainJson(app,
                                          linked_bucket + linked_domain,
                                          reload=True)
        log.debug(f"got linked json: {linked_json}")
        if "root" not in linked_json:
            msg = "Folder domains cannot ber used as link target"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        root_id = linked_json["root"]
        aclCheck(linked_json, "read", username)
        aclCheck(linked_json, "delete", username)
    else:
        linked_json = None

    if not is_folder and not linked_json:
        # create a root group for the new domain
        root_id = createObjId("roots")
        log.debug(f"new root group id: {root_id}")
        group_json = {"id": root_id, "root": root_id, "domain": domain}
        log.debug("create group for domain, body: " + json.dumps(group_json))

        # create root group
        req = getDataNodeUrl(app, root_id) + "/groups"
        params = {}
        bucket = getBucketForDomain(domain)
        if bucket:
            params["bucket"] = bucket
        try:
            group_json = await http_post(app,
                                         req,
                                         data=group_json,
                                         params=params)
        except ClientResponseError as ce:
            msg = "Error creating root group for domain -- " + str(ce)
            log.error(msg)
            raise HTTPInternalServerError()
    else:
        log.debug("no root group, creating folder")

    domain_json = {}

    domain_acls = {}
    # owner gets full control
    domain_acls[owner] = owner_perm
    if config.get("default_public") or is_folder:
        # this will make the domain public readable
        log.debug(f"adding default perm for domain: {domain}")
        domain_acls["default"] = default_perm

    # construct dn request to create new domain
    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"owner": owner, "domain": domain}
    body["acls"] = domain_acls

    if root_id:
        body["root"] = root_id

    log.debug(f"creating domain: {domain} with body: {body}")
    try:
        domain_json = await http_put(app, req, data=body)
    except ClientResponseError as ce:
        msg = "Error creating domain state -- " + str(ce)
        log.error(msg)
        raise HTTPInternalServerError()

    # domain creation successful
    # maxin limits
    domain_json["limits"] = getLimits()
    domain_json["version"] = getVersion()
    resp = await jsonResponse(request, domain_json, status=201)
    log.response(request, resp=resp)
    return resp
Example #26
0
    async def get_object(self, key, bucket=None, offset=0, length=-1):
        """ Return data for object at given key.
           If Range is set, return the given byte range.
        """

        range=""
        if length > 0:
            range = f"bytes={offset}-{offset+length-1}"
            log.info(f"storage range request: {range}")

        if not bucket:
            log.error("get_object - bucket not set")
            raise HTTPInternalServerError()

        start_time = time.time()
        log.debug(f"s3Client.get_object({bucket}/{key}) start: {start_time}")
        session = self._app["session"]
        self._renewToken()
        async with session.create_client('s3', region_name=self._aws_region,
                                    aws_secret_access_key=self._aws_secret_access_key,
                                    aws_access_key_id=self._aws_access_key_id,
                                    aws_session_token=self._aws_session_token,
                                    endpoint_url=self._s3_gateway,
                                    use_ssl=self._use_ssl,
                                    config=self._aio_config) as _client:
            try:
                resp = await _client.get_object(Bucket=bucket, Key=key, Range=range)
                data = await resp['Body'].read()
                finish_time = time.time()
                if offset > 0:
                    range_key = f"{key}[{offset}:{offset+length}]"
                else:
                    range_key = key
                log.info(f"s3Client.get_object({range_key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}")

                resp['Body'].close()
            except ClientError as ce:
                # key does not exist?
                # check for not found status
                response_code = ce.response["Error"]["Code"]
                if response_code in ("NoSuchKey", "404") or response_code == 404:
                    msg = f"s3_key: {key} not found "
                    log.warn(msg)
                    raise HTTPNotFound()
                elif response_code == "NoSuchBucket":
                    msg = f"s3_bucket: {bucket} not found"
                    log.info(msg)
                    raise HTTPNotFound()
                elif response_code in ("AccessDenied", "401", "403") or response_code in (401, 403):
                    msg = f"access denied for s3_bucket: {bucket}"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._s3_stats_increment("error_count")
                    log.error(f"got unexpected ClientError on s3 get {key}: {response_code}")
                    raise HTTPInternalServerError()
            except CancelledError as cle:
                self._s3_stats_increment("error_count")
                msg = f"CancelledError for get s3 obj {key}: {cle}"
                log.error(msg)
                raise HTTPInternalServerError()
            except Exception as e:
                self._s3_stats_increment("error_count")
                msg = f"Unexpected Exception {type(e)} get s3 obj {key}: {e}"
                log.error(msg)
                raise HTTPInternalServerError()
        return data
Example #27
0
async def GET_ACL(request):
    """HTTP method to return JSON for given domain/ACL"""
    log.request(request)
    app = request.app

    acl_username = request.match_info.get('username')
    if not acl_username:
        msg = "Missing username for ACL"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code in (404, 410):
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    # validate that the requesting user has permission to read ACLs in this domain
    if acl_username in (username, "default"):
        # allow read access for a users on ACL, or default
        aclCheck(domain_json, "read",
                 username)  # throws exception if not authorized
    else:
        aclCheck(domain_json, "readACL",
                 username)  # throws exception if not authorized

    if 'owner' not in domain_json:
        log.warn("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.warn("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug(f"got domain_json: {domain_json}")

    if acl_username not in acls:
        msg = f"acl for username: [{acl_username}] not found"
        log.warn(msg)
        raise HTTPNotFound()

    acl = acls[acl_username]
    acl_rsp = {}
    for k in acl.keys():
        acl_rsp[k] = acl[k]
    acl_rsp["userName"] = acl_username

    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acl"] = acl_rsp
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Example #28
0
async def health_get(request: Request) -> Response:
    try:
        return Response(body=json.dumps({"status": "OK"}), headers={'content-type': 'application/json'})
    except Exception as ex:
        log.warning(f"Endpoint: health, Method: get. Error:{str(ex)}")
        return HTTPInternalServerError()
Example #29
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")
    dset_json = json.loads(params["dset"])
    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    # check that the content_length is what we expect
    if itemsize != 'H5T_VARIABLE':
        log.debug("expect content_length: {}".format(num_elements * itemsize))
    log.debug(f"actual content_length: {request.content_length}")

    if itemsize != 'H5T_VARIABLE' and (num_elements *
                                       itemsize) != request.content_length:
        msg = "Expected content_length of: {}, but got: {}".format(
            num_elements * itemsize, request.content_length)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming data
    input_bytes = await request_read(
        request
    )  # TBD - will it cause problems when failures are raised before reading data?
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes),
                                                    request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()

    input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=True,
                               bucket=bucket)

    # update chunk array
    chunk_arr[selection] = input_arr
    chunk_cache = app["chunk_cache"]
    chunk_cache.setDirty(chunk_id)
    log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

    # async write to S3
    dirty_ids = app["dirty_ids"]
    now = int(time.time())
    dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    resp = json_response({}, status=201)
    log.response(request, resp=resp)
    return resp
Example #30
0
async def GET_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {params.keys()}")

    bucket = None
    s3path = None
    s3offset = None
    s3size = None
    query = None
    limit = 0
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
    elif "bucket" in params:
        bucket = params["bucket"]
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    if "query" in params:
        query = params["query"]
    if "Limit" in params:
        limit = int(params["Limit"])

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    chunk_arr = await get_chunk(app,
                                chunk_id,
                                dset_json,
                                bucket=bucket,
                                s3path=s3path,
                                s3offset=s3offset,
                                s3size=s3size,
                                chunk_init=False)
    if chunk_arr is None:
        msg = f"chunk {chunk_id} not found"
        log.warn(msg)
        raise HTTPNotFound()

    if query:
        # run given query
        try:
            read_resp = chunkQuery(chunk_id=chunk_id,
                                   chunk_layout=dims,
                                   chunk_arr=chunk_arr,
                                   slices=selection,
                                   query=query,
                                   limit=limit,
                                   return_json=True)
        except TypeError as te:
            log.warn(f"chunkQuery - TypeError: {te}")
            raise HTTPBadRequest()
        except ValueError as ve:
            log.warn(f"chunkQuery - ValueError: {ve}")
            raise HTTPBadRequest()
    else:
        # read selected data from chunk
        output_arr = chunkReadSelection(chunk_arr, slices=selection)
        read_resp = arrayToBytes(output_arr)

    # write response
    if isinstance(read_resp, bytes):

        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(read_resp)
            await resp.prepare(request)
            await resp.write(read_resp)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()
    else:
        # JSON response
        resp = json_response(read_resp)

    return resp