Python getDatasetId Examples

Programming Language: Python

Namespace/Package Name: util.chunkUtil

Method/Function: getDatasetId

Examples at hotexamples.com: 12

Python getDatasetId - 12 examples found. These are the top rated real world Python examples of util.chunkUtil.getDatasetId extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: chunk_dn.py Project: pcrespov/hsds

async def DELETE_Chunk(request):
    """HTTP DELETE method for /chunks/
    Note: clients (i.e. SN nodes) don't directly delete chunks.  This method should
    only be called by the AN node.
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"DELETE chunk: {chunk_id}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    validateInPartition(app, chunk_id)

    chunk_cache = app['chunk_cache']
    s3key = getS3Key(chunk_id)
    log.debug(f"DELETE_Chunk s3_key: {s3key}")

    if chunk_id in chunk_cache:
        del chunk_cache[chunk_id]

    deflate_map = app["deflate_map"]
    shuffle_map = app["shuffle_map"]
    dset_id = getDatasetId(chunk_id)
    if dset_id in deflate_map:
        # The only reason chunks are ever deleted is if the dataset is being deleted,
        # so it should be safe to remove this entry now
        log.info(f"Removing deflate_map entry for {dset_id}")
        del deflate_map[dset_id]
    if dset_id in shuffle_map:
        log.info(f"Removing shuffle_map entry for {dset_id}")
        del shuffle_map[dset_id]

    if await isStorObj(app, s3key, bucket=bucket):
        await deleteStorObj(app, s3key, bucket=bucket)
    else:
        log.info(
            f"delete_metadata_obj - key {s3key} not found (never written)?")

    resp_json = {}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp

Example #2

Show file

async def DELETE_Chunk(request):
    """HTTP DELETE method for /chunks/
    Note: clients (i.e. SN nodes) don't directly delete chunks.  This method should
    only be called by the AN node.
    """
    log.request(request)
    app = request.app
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info("DELETE chunk: {}".format(chunk_id))

    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)

    chunk_cache = app['chunk_cache'] 
    s3_key = getS3Key(chunk_id)
    log.debug("DELETE_Chunk s3_key: {}".format(s3_key))

    if chunk_id in chunk_cache:
        del chunk_cache[chunk_id]

    deflate_map = app["deflate_map"]
    dset_id = getDatasetId(chunk_id)
    if dset_id in deflate_map:
        # The only reason chunks are ever deleted is if the dataset is being deleted,
        # so it should be save to remove this entry now
        log.info("Removing deflate_map entry for {}".format(dset_id))
        del deflate_map[dset_id]

    resp_json = {  }   
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp

Example #3

Show file

File: datanode_lib.py Project: loricooperhdf/hsds

async def write_s3_obj(app, obj_id, bucket=None):
    """ writes the given object to s3 """
    s3key = getS3Key(obj_id)
    log.info(
        f"write_s3_obj for obj_id: {obj_id} / s3_key: {s3key}  bucket: {bucket}"
    )
    pending_s3_write = app["pending_s3_write"]
    pending_s3_write_tasks = app["pending_s3_write_tasks"]
    dirty_ids = app["dirty_ids"]
    chunk_cache = app['chunk_cache']
    meta_cache = app['meta_cache']
    deflate_map = app['deflate_map']
    shuffle_map = app['shuffle_map']
    notify_objs = app["root_notify_ids"]
    deleted_ids = app['deleted_ids']
    success = False

    if isValidDomain(obj_id):
        domain_bucket = getBucketForDomain(obj_id)
        if bucket and bucket != domain_bucket:
            log.error(
                f"expected bucket for domain: {obj_id} to match what wsas passed to write_s3_obj"
            )
        else:
            bucket = domain_bucket

    if s3key in pending_s3_write:
        msg = f"write_s3_key - not expected for key {s3key} to be in pending_s3_write map"
        log.error(msg)
        raise KeyError(msg)

    if obj_id not in pending_s3_write_tasks:
        # don't allow reentrant write
        log.debug(f"write_s3_obj for {obj_id} not s3sync task")

    if obj_id in deleted_ids and isValidUuid(obj_id):
        # if this objid has been deleted (and its unique since this is not a domain id)
        # cancel any pending task and return
        log.warn(f"Canceling write for {obj_id} since it has been deleted")
        if obj_id in pending_s3_write_tasks:
            log.info(f"removing pending s3 write task for {obj_id}")
            task = pending_s3_write_tasks[obj_id]
            task.cancel()
            del pending_s3_write_tasks[obj_id]
        return None

    now = time.time()

    last_update_time = now
    if obj_id in dirty_ids:
        last_update_time = dirty_ids[obj_id][
            0]  # timestamp is first element of two-tuple
    if last_update_time > now:
        msg = f"last_update time {last_update_time} is in the future for obj_id: {obj_id}"
        log.error(msg)
        raise ValueError(msg)

    pending_s3_write[s3key] = now
    # do the following in the try block so we can always remove the pending_s3_write at the end

    try:
        if isValidChunkId(obj_id):
            if obj_id not in chunk_cache:
                log.error(f"expected to find obj_id: {obj_id} in chunk cache")
                raise KeyError(f"{obj_id} not found in chunk cache")
            if not chunk_cache.isDirty(obj_id):
                log.error(f"expected chunk cache obj {obj_id} to be dirty")
                raise ValueError("bad dirty state for obj")
            chunk_arr = chunk_cache[obj_id]
            chunk_bytes = arrayToBytes(chunk_arr)
            dset_id = getDatasetId(obj_id)
            deflate_level = None
            shuffle = 0
            if dset_id in shuffle_map:
                shuffle = shuffle_map[dset_id]
            if dset_id in deflate_map:
                deflate_level = deflate_map[dset_id]
                log.debug(
                    f"got deflate_level: {deflate_level} for dset: {dset_id}")
            if dset_id in shuffle_map:
                shuffle = shuffle_map[dset_id]
                log.debug(f"got shuffle size: {shuffle} for dset: {dset_id}")

            await putS3Bytes(app,
                             s3key,
                             chunk_bytes,
                             shuffle=shuffle,
                             deflate_level=deflate_level,
                             bucket=bucket)
            success = True

            # if chunk has been evicted from cache something has gone wrong
            if obj_id not in chunk_cache:
                msg = f"expected to find {obj_id} in chunk_cache"
                log.error(msg)
            elif obj_id in dirty_ids and dirty_ids[obj_id][
                    0] > last_update_time:
                log.info(
                    f"write_s3_obj {obj_id} got updated while s3 write was in progress"
                )
            else:
                # no new write, can clear dirty
                chunk_cache.clearDirty(obj_id)  # allow eviction from cache
                log.debug(
                    "putS3Bytes Chunk cache utilization: {} per, dirty_count: {}"
                    .format(chunk_cache.cacheUtilizationPercent,
                            chunk_cache.dirtyCount))
        else:
            # meta data update
            # check for object in meta cache
            if obj_id not in meta_cache:
                log.error(f"expected to find obj_id: {obj_id} in meta cache")
                raise KeyError(f"{obj_id} not found in meta cache")
            if not meta_cache.isDirty(obj_id):
                log.error(f"expected meta cache obj {obj_id} to be dirty")
                raise ValueError("bad dirty state for obj")
            obj_json = meta_cache[obj_id]

            await putS3JSONObj(app, s3key, obj_json, bucket=bucket)
            success = True
            # should still be in meta_cache...
            if obj_id in deleted_ids:
                log.info(
                    f"obj {obj_id} has been deleted while write was in progress"
                )
            elif obj_id not in meta_cache:
                msg = f"expected to find {obj_id} in meta_cache"
                log.error(msg)
            elif obj_id in dirty_ids and dirty_ids[obj_id][
                    0] > last_update_time:
                log.info(
                    f"write_s3_obj {obj_id} got updated while s3 write was in progress"
                )
            else:
                meta_cache.clearDirty(obj_id)  # allow eviction from cache
    finally:
        # clear pending_s3_write item
        log.debug(f"write_s3_obj finally block, success={success}")
        if s3key not in pending_s3_write:
            msg = f"write s3 obj: Expected to find {s3key} in pending_s3_write map"
            log.error(msg)
        else:
            if pending_s3_write[s3key] != now:
                msg = f"pending_s3_write timestamp got updated unexpectedly for {s3key}"
                log.error(msg)
            del pending_s3_write[s3key]
        # clear task
        if obj_id not in pending_s3_write_tasks:
            log.debug(f"no pending s3 write task for {obj_id}")
        else:
            log.debug(f"removing pending s3 write task for {obj_id}")
            del pending_s3_write_tasks[obj_id]
        # clear dirty flag
        if obj_id in dirty_ids and dirty_ids[obj_id][0] == last_update_time:
            log.debug(f"clearing dirty flag for {obj_id}")
            del dirty_ids[obj_id]

    # add to map so that root can be notified about changed objects
    if isValidUuid(obj_id) and isSchema2Id(obj_id):
        root_id = getRootObjId(obj_id)
        notify_objs[root_id] = bucket

    # calculate time to do the write
    elapsed_time = time.time() - now
    log.info(f"s3 write for {s3key} took {elapsed_time:.3f}s")
    return obj_id

Example #4

Show file

async def getChunk(app, chunk_id, dset_json, s3path=None, s3offset=0, s3size=0, chunk_init=False):
    # if the chunk cache has too many dirty items, wait till items get flushed to S3
    MAX_WAIT_TIME = 10.0  # TBD - make this a config
    chunk_cache = app['chunk_cache']
    if chunk_init and s3offset > 0:
        log.error(f"unable to initiale chunk {chunk_id} for reference layouts ")
        raise  HTTPInternalServerError()

    log.debug(f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}")

    chunk_arr = None 
    dset_id = getDatasetId(chunk_id)
    dims = getChunkLayout(dset_json)
    type_json = dset_json["type"]
    dt = createDataType(type_json)

    bucket = None
    s3key = None

    if s3path:
        if not s3path.startswith("s3://"):
            # TBD - verify these at dataset creation time?
            log.error(f"unexpected s3path for getChunk: {s3path}")
            raise  HTTPInternalServerError()
        path = s3path[5:]
        index = path.find('/')   # split bucket and key
        if index < 1:
            log.error(f"s3path is invalid: {s3path}")
            raise HTTPInternalServerError()
        bucket = path[:index]
        log.debug(f"using bucket: {bucket}")
        s3key = path[(index+1):]
        log.debug(f"Using bucket: {bucket} and  s3key: {s3key}")
    else:
        s3key = getS3Key(chunk_id)

    log.debug("getChunk s3key: {}".format(s3key))
    if chunk_id in chunk_cache:
        chunk_arr = chunk_cache[chunk_id]
    else:
        if s3path and s3size == 0:
            obj_exists = False
        else:
            obj_exists = await isS3Obj(app, s3key, bucket=bucket)
        # TBD - potential race condition?
        if obj_exists:
            pending_s3_read = app["pending_s3_read"]
            if chunk_id in pending_s3_read:
                # already a read in progress, wait for it to complete
                read_start_time = pending_s3_read[chunk_id]
                log.info(f"s3 read request for {chunk_id} was requested at: {read_start_time}")
                while time.time() - read_start_time < 2.0:
                    log.debug("waiting for pending s3 read, sleeping")
                    await asyncio.sleep(1)  # sleep for sub-second?
                    if chunk_id in chunk_cache:
                        log.info(f"Chunk {chunk_id} has arrived!")
                        chunk_arr = chunk_cache[chunk_id]
                        break
                if chunk_arr is None:
                    log.warn(f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read")
            
            if chunk_arr is None:
                if chunk_id not in pending_s3_read:
                    pending_s3_read[chunk_id] = time.time()
                log.debug("Reading chunk {} from S3".format(s3key))
                deflate_level = getDeflate(app, dset_id, dset_json)
                chunk_bytes = await getS3Bytes(app, s3key, deflate_level=deflate_level, s3offset=s3offset, s3size=s3size, bucket=bucket)
                if chunk_id in pending_s3_read:
                    # read complete - remove from pending map
                    elapsed_time = time.time() - pending_s3_read[chunk_id]
                    log.info(f"s3 read for {s3key} took {elapsed_time}")
                    del pending_s3_read[chunk_id] 
                else:
                    log.warn(f"expected to find {chunk_id} in pending_s3_read map")

            
            chunk_arr = np.fromstring(chunk_bytes, dtype=dt)
            chunk_arr = chunk_arr.reshape(dims)

            log.debug("chunk size: {}".format(chunk_arr.size)) 
           
        elif chunk_init:
            log.debug("Initializing chunk {chunk_id}")
            fill_value = getFillValue(dset_json)
            if fill_value:
                # need to convert list to tuples for numpy broadcast
                if isinstance(fill_value, list):
                    fill_value = tuple(fill_value)
                chunk_arr = np.empty(dims, dtype=dt, order='C')
                chunk_arr[...] = fill_value
            else:
                chunk_arr = np.zeros(dims, dtype=dt, order='C')
        else:
            log.debug(f"Chunk {chunk_id} not found")
            
        if chunk_arr is not None:
            # check that there's room in the cache before adding it
            if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                # no room in the cache, wait till space is freed by the s3sync task
                wait_start = time.time()
                while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                    log.warn(f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed")
                    if time.time() - wait_start > MAX_WAIT_TIME:
                        log.error(f"unable to save updated chunk {chunk_id} to cache returning 503 error")
                        raise HTTPServiceUnavailable()
                    await asyncio.sleep(1)
                 
            chunk_cache[chunk_id] = chunk_arr  # store in cache
    return chunk_arr

Example #5

Show file

File: async_lib.py Project: t20100/hsds

def scanRootCallback(app, s3keys):
    log.debug(f"scanRootCallback, {len(s3keys)} items")
    if isinstance(s3keys, list):
        log.error("got list result for s3keys callback")
        raise ValueError("unexpected callback format")

    results = app["scanRoot_results"]
    if results:
        log.debug(f"previous scanRoot_results:".format(results))
    for s3key in s3keys.keys():

        if not isS3ObjKey(s3key):
            log.info(f"not s3obj key, ignoring: {s3key}")
            continue
        objid = getObjId(s3key)
        etag = None
        obj_size = None
        lastModified = None
        item = s3keys[s3key]
        if "ETag" in item:
            etag = item["ETag"]
        if "Size" in item:
            obj_size = item["Size"]
        if "LastModified" in item:
            lastModified = item["LastModified"]
        log.debug(f"{objid}: {etag} {obj_size} {lastModified}")

        if lastModified > results["lastModified"]:
            log.debug(f"changing lastModified from: {results['lastModified']} to {lastModified}")
            results["lastModified"] = lastModified
        is_chunk = False
        if isValidChunkId(objid):
            is_chunk = True
            results["num_chunks"] += 1
            results["allocated_bytes"] += obj_size
        else:
            results["metadata_bytes"] += obj_size


        if is_chunk or getCollectionForId(objid) == "datasets":
            if is_chunk:
                dsetid = getDatasetId(objid)
            else:
                dsetid = objid
            datasets = results["datasets"]
            if dsetid not in datasets:
                dataset_info = {}
                dataset_info["lastModified"] = 0
                dataset_info["num_chunks"] = 0
                dataset_info["allocated_bytes"] = 0
                datasets[dsetid] = dataset_info
            dataset_info = datasets[dsetid]
            if lastModified > dataset_info["lastModified"]:
                dataset_info["lastModified"] = lastModified
                if is_chunk:
                    dataset_info["num_chunks"] += 1
                    dataset_info["allocated_bytes"] += obj_size
        elif getCollectionForId(objid) == "groups":
            results["num_groups"] += 1
        elif getCollectionForId(objid) == "datatypes":
            results["num_datatypes"] += 1
        else:
            log.error(f"Unexpected collection type for id: {objid}")

Example #6

Show file

File: chunk_dn.py Project: pcrespov/hsds

async def GET_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {params.keys()}")

    s3path = None
    s3offset = 0
    s3size = 0
    bucket = None
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    if "dset" in params:
        msg = "Unexpected dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")
    type_json = dset_json["type"]

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    rank = len(dims)
    if rank == 0:
        msg = "No dimension passed to GET chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size)

    if chunk_arr is None:
        # return a 404
        msg = f"Chunk {chunk_id} does not exist"
        log.info(msg)
        raise HTTPNotFound()

    resp = None

    if "query" in params:
        # do query selection
        query = params["query"]
        log.info(f"query: {query}")
        if rank != 1:
            msg = "Query selection only supported for one dimensional arrays"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        limit = 0
        if "Limit" in params:
            limit = int(params["Limit"])

        values = []
        indices = []
        field_names = []
        if dt.fields:
            field_names = list(dt.fields.keys())

        x = chunk_arr[selection]
        log.debug(f"x: {x}")
        eval_str = getEvalStr(query, "x", field_names)
        log.debug(f"eval_str: {eval_str}")
        where_result = np.where(eval(eval_str))
        log.debug(f"where_result: {where_result}")
        where_result_index = where_result[0]
        log.debug(f"whare_result index: {where_result_index}")
        log.debug(f"boolean selection: {x[where_result_index]}")
        s = selection[0]
        count = 0
        for index in where_result_index:
            log.debug(f"index: {index}")
            value = x[index].tolist()
            log.debug(f"value: {value}")
            json_val = bytesArrayToList(value)
            log.debug(f"json_value: {json_val}")
            json_index = index.tolist(
            ) * s.step + s.start  # adjust for selection
            indices.append(json_index)
            values.append(json_val)
            count += 1
            if limit > 0 and count >= limit:
                log.info("got limit items")
                break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # get requested data
        output_arr = chunk_arr[selection]
        output_data = arrayToBytes(output_arr)

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()

        finally:
            await resp.write_eof()

    return resp

Example #7

Show file

File: chunk_dn.py Project: pcrespov/hsds

async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points, num_points: {num_points}")

        put_points = True
    else:
        log.info("POST Chunk get points")
    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    log.debug(f"dset_json: {dset_json}")
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug(f"chunk_coord: {chunk_coord}")

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug(f"dtype: {dset_dtype}")

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size,
                               chunk_init=put_points)

    if chunk_arr is None:
        if put_points:
            log.error("no array returned for put_points")
            raise HTTPInternalServerError()
        else:
            # get points on a non-existent S3 objects?
            log.warn("S3 object not found for get points")
            raise HTTPNotFound()

    log.debug(f"chunk_arr.shape: {chunk_arr.shape}")

    if put_points:
        # writing point data

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = f"({rank},)uint64"
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)),
                               ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)

        if len(point_arr) != num_points:
            msg = f"Unexpected size of point array, got: {len(point_arr)} expected: {num_points}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        for i in range(num_points):
            elem = point_arr[i]
            log.debug(f"non-relative coordinate: {elem}")
            if rank == 1:
                coord = int(elem[0])
                coord = coord % chunk_layout[0]  # adjust to chunk relative

            else:
                coord = elem[0]  # index to update
                for dim in range(rank):
                    # adjust to chunk relative
                    coord[dim] = int(coord[dim]) % chunk_layout[dim]
                coord = tuple(coord)  # need to convert to a tuple
            log.debug(f"relative coordinate: {coord}")

            val = elem[1]  # value
            try:
                chunk_arr[coord] = val  # update the point
            except IndexError:
                msg = "Out of bounds point index for POST Chunk"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)
        log.info(f"set {chunk_id} to dirty")

    else:
        # reading point data
        point_dt = np.dtype('uint64')  # use unsigned long for point index
        point_arr = np.fromstring(
            input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug(f"got {num_points} points")

        point_arr = point_arr.reshape((num_points, rank))
        output_arr = np.zeros((num_points, ), dtype=dset_dtype)

        for i in range(num_points):
            point = point_arr[i, :]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val

    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp

Example #8

Show file

File: chunk_dn.py Project: pcrespov/hsds

async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
    else:
        expected_content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    resp = {}
    query_update = None
    limit = 0
    chunk_init = True
    input_arr = None
    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        if "Limit" in params:
            limit = int(params["Limit"])
        chunk_init = False
    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements *
                                           itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(
            request
        )  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=chunk_init,
                               bucket=bucket)
    is_dirty = False
    if query:
        values = []
        indices = []
        if chunk_arr is not None:
            # do query selection
            limit = 0
            if "Limit" in params:
                limit = int(params["Limit"])

            field_names = list(dt.fields.keys())
            replace_mask = [
                None,
            ] * len(field_names)
            for i in range(len(field_names)):
                field_name = field_names[i]
                if field_name in query_update:
                    replace_mask[i] = query_update[field_name]
            log.debug(f"replace_mask: {replace_mask}")

            x = chunk_arr[selection]
            log.debug(f"put_query - x: {x}")
            eval_str = getEvalStr(query, "x", field_names)
            log.debug(f"put_query - eval_str: {eval_str}")
            where_result = np.where(eval(eval_str))
            log.debug(f"put_query - where_result: {where_result}")
            where_result_index = where_result[0]
            log.debug(f"put_query - whare_result index: {where_result_index}")
            log.debug(
                f"put_query - boolean selection: {x[where_result_index]}")
            s = selection[0]
            count = 0
            for index in where_result_index:
                log.debug(f"put_query - index: {index}")
                value = x[index]
                log.debug(f"put_query - original value: {value}")
                for i in range(len(field_names)):
                    if replace_mask[i] is not None:
                        value[i] = replace_mask[i]
                log.debug(f"put_query - modified value: {value}")
                x[index] = value

                json_val = bytesArrayToList(value)
                log.debug(f"put_query - json_value: {json_val}")
                json_index = index.tolist(
                ) * s.step + s.start  # adjust for selection
                indices.append(json_index)
                values.append(json_val)
                count += 1
                is_dirty = True
                if limit > 0 and count >= limit:
                    log.info("put_query - got limit items")
                    break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # update chunk array
        chunk_arr[selection] = input_arr
        is_dirty = True
        resp = json_response({}, status=201)

    if is_dirty:
        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)
        log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    log.response(request, resp=resp)
    return resp

Example #9

Show file

async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    query_update = None
    limit = 0
    bucket = None
    input_arr = None

    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    if "Limit" in params:
        limit = int(params["Limit"])
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
        chunk_init = False  # don't initalize new chunks on query update
    else:
        expected_content_type = "application/octet-stream"
        chunk_init = True
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    # TBD - does this work with linked datasets?
    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    type_json = dset_json["type"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, chunk_init=chunk_init)
    is_dirty = False
    if chunk_arr is None:
        if chunk_init:
            log.error(f"failed to create numpy array")
            raise HTTPInternalServerError()
        else:
            log.warn(f"chunk {chunk_id} not found")
            raise HTTPNotFound()

    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        if rank != 1:
            log.error("expected one-dimensional array for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        # TBD - send back binary response to SN node
        try:
            resp = chunkQuery(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, slices=selection,
                query=query, query_update=query_update, limit=limit, return_json=True)
        except TypeError as te:
            log.warn(f"chunkQuery - TypeError: {te}")
            raise HTTPBadRequest()
        except ValueError as ve:
            log.warn(f"chunkQuery - ValueError: {ve}")
            raise HTTPBadRequest()
        if query_update and resp is not None:
            is_dirty = True


    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(request)  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

        chunkWriteSelection(chunk_arr=chunk_arr, slices=selection, data=input_arr)
        is_dirty = True

        # chunk update successful
        resp = {}
    if is_dirty:
        save_chunk(app, chunk_id, bucket=bucket)

    resp = json_response(resp, status=201)
    log.response(request, resp=resp)
    return resp

Example #10

Show file

async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" not in params:
        log.warn("expected count param")
        raise HTTPBadRequest()
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points - num_points: {num_points}")
        put_points = True
    else:
        log.info(f"POST Chunk get points - num_points: {num_points}")

    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    dims = getChunkLayout(dset_json)
    rank = len(dims)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    if rank == 1:
        coord_type_str = "uint64"
    else:
        coord_type_str = f"({rank},)uint64"

    if put_points:
        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        point_dt = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)])
        point_shape = (num_points,)
        chunk_init = True
    else:
        point_dt = np.dtype('uint64')
        point_shape = (num_points, rank)
        chunk_init = False

    point_arr = bytesToArray(input_bytes, point_dt, point_shape)

    chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, s3path=s3path, s3offset=s3offset, s3size=s3size, chunk_init=chunk_init)
    if chunk_arr is None:
        log.warn(f"chunk {chunk_id} not found")
        raise HTTPNotFound()

    if put_points:
        # writing point data
        try:
            chunkWritePoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkWritePoints: {ve}")
            raise HTTPBadRequest()
         # write empty response
        resp = json_response({})

        save_chunk(app, chunk_id, bucket=bucket) # lazily write chunk to storage
    else:
        # read points
        try:
            output_arr = chunkReadPoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkReadPoints: {ve}")
            raise HTTPBadRequest()
        output_data = arrayToBytes(output_arr)
        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp

Example #11

Show file

async def GET_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {params.keys()}")

    bucket = None
    s3path = None
    s3offset = None
    s3size = None
    query = None
    limit = 0
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
    elif "bucket" in params:
        bucket = params["bucket"]

    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    if "query" in params:
        query = params["query"]
    if "Limit" in params:
        limit = int(params["Limit"])

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    log.debug(f"dset_json: {dset_json}")

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, s3path=s3path, s3offset=s3offset, s3size=s3size, chunk_init=False)
    if chunk_arr is None:
        msg = f"chunk {chunk_id} not found"
        log.warn(msg)
        raise HTTPNotFound()

    if query:
        # run given query
        try:
            read_resp = chunkQuery(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, slices=selection,
                query=query, limit=limit, return_json=True)
        except TypeError as te:
            log.warn(f"chunkQuery - TypeError: {te}")
            raise HTTPBadRequest()
        except ValueError as ve:
            log.warn(f"chunkQuery - ValueError: {ve}")
            raise HTTPBadRequest()
    else:
        # read selected data from chunk
        output_arr = chunkReadSelection(chunk_arr, slices=selection)
        read_resp = arrayToBytes(output_arr)

    # write response
    if isinstance(read_resp, bytes):

        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(read_resp)
            await resp.prepare(request)
            await resp.write(read_resp)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()
    else:
        # JSON response
        resp = json_response(read_resp)

    return resp

Example #12

Show file

File: chunk_dn.py Project: murlock/hsds

async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    # check that the content_length is what we expect
    if itemsize != 'H5T_VARIABLE':
        log.debug("expect content_length: {}".format(num_elements * itemsize))
    log.debug(f"actual content_length: {request.content_length}")

    if itemsize != 'H5T_VARIABLE' and (num_elements *
                                       itemsize) != request.content_length:
        msg = "Expected content_length of: {}, but got: {}".format(
            num_elements * itemsize, request.content_length)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming data
    input_bytes = await request_read(
        request
    )  # TBD - will it cause problems when failures are raised before reading data?
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes),
                                                    request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()

    input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=True,
                               bucket=bucket)

    # update chunk array
    chunk_arr[selection] = input_arr
    chunk_cache = app["chunk_cache"]
    chunk_cache.setDirty(chunk_id)
    log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

    # async write to S3
    dirty_ids = app["dirty_ids"]
    now = int(time.time())
    dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    resp = json_response({}, status=201)
    log.response(request, resp=resp)
    return resp