Beispiel #1
0
async def getChunk(app, chunk_id, dset_json, s3path=None, s3offset=0, s3size=0, chunk_init=False):
    # if the chunk cache has too many dirty items, wait till items get flushed to S3
    MAX_WAIT_TIME = 10.0  # TBD - make this a config
    chunk_cache = app['chunk_cache']
    if chunk_init and s3offset > 0:
        log.error(f"unable to initiale chunk {chunk_id} for reference layouts ")
        raise  HTTPInternalServerError()

    log.debug(f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}")

    chunk_arr = None 
    dset_id = getDatasetId(chunk_id)
    dims = getChunkLayout(dset_json)
    type_json = dset_json["type"]
    dt = createDataType(type_json)

    bucket = None
    s3key = None

    if s3path:
        if not s3path.startswith("s3://"):
            # TBD - verify these at dataset creation time?
            log.error(f"unexpected s3path for getChunk: {s3path}")
            raise  HTTPInternalServerError()
        path = s3path[5:]
        index = path.find('/')   # split bucket and key
        if index < 1:
            log.error(f"s3path is invalid: {s3path}")
            raise HTTPInternalServerError()
        bucket = path[:index]
        log.debug(f"using bucket: {bucket}")
        s3key = path[(index+1):]
        log.debug(f"Using bucket: {bucket} and  s3key: {s3key}")
    else:
        s3key = getS3Key(chunk_id)

    log.debug("getChunk s3key: {}".format(s3key))
    if chunk_id in chunk_cache:
        chunk_arr = chunk_cache[chunk_id]
    else:
        if s3path and s3size == 0:
            obj_exists = False
        else:
            obj_exists = await isS3Obj(app, s3key, bucket=bucket)
        # TBD - potential race condition?
        if obj_exists:
            pending_s3_read = app["pending_s3_read"]
            if chunk_id in pending_s3_read:
                # already a read in progress, wait for it to complete
                read_start_time = pending_s3_read[chunk_id]
                log.info(f"s3 read request for {chunk_id} was requested at: {read_start_time}")
                while time.time() - read_start_time < 2.0:
                    log.debug("waiting for pending s3 read, sleeping")
                    await asyncio.sleep(1)  # sleep for sub-second?
                    if chunk_id in chunk_cache:
                        log.info(f"Chunk {chunk_id} has arrived!")
                        chunk_arr = chunk_cache[chunk_id]
                        break
                if chunk_arr is None:
                    log.warn(f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read")
            
            if chunk_arr is None:
                if chunk_id not in pending_s3_read:
                    pending_s3_read[chunk_id] = time.time()
                log.debug("Reading chunk {} from S3".format(s3key))
                deflate_level = getDeflate(app, dset_id, dset_json)
                chunk_bytes = await getS3Bytes(app, s3key, deflate_level=deflate_level, s3offset=s3offset, s3size=s3size, bucket=bucket)
                if chunk_id in pending_s3_read:
                    # read complete - remove from pending map
                    elapsed_time = time.time() - pending_s3_read[chunk_id]
                    log.info(f"s3 read for {s3key} took {elapsed_time}")
                    del pending_s3_read[chunk_id] 
                else:
                    log.warn(f"expected to find {chunk_id} in pending_s3_read map")

            
            chunk_arr = np.fromstring(chunk_bytes, dtype=dt)
            chunk_arr = chunk_arr.reshape(dims)

            log.debug("chunk size: {}".format(chunk_arr.size)) 
           
        elif chunk_init:
            log.debug("Initializing chunk {chunk_id}")
            fill_value = getFillValue(dset_json)
            if fill_value:
                # need to convert list to tuples for numpy broadcast
                if isinstance(fill_value, list):
                    fill_value = tuple(fill_value)
                chunk_arr = np.empty(dims, dtype=dt, order='C')
                chunk_arr[...] = fill_value
            else:
                chunk_arr = np.zeros(dims, dtype=dt, order='C')
        else:
            log.debug(f"Chunk {chunk_id} not found")
            
        if chunk_arr is not None:
            # check that there's room in the cache before adding it
            if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                # no room in the cache, wait till space is freed by the s3sync task
                wait_start = time.time()
                while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                    log.warn(f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed")
                    if time.time() - wait_start > MAX_WAIT_TIME:
                        log.error(f"unable to save updated chunk {chunk_id} to cache returning 503 error")
                        raise HTTPServiceUnavailable()
                    await asyncio.sleep(1)
                 
            chunk_cache[chunk_id] = chunk_arr  # store in cache
    return chunk_arr
Beispiel #2
0
async def GET_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    
    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    
    log.debug("dset_json: {}".format(dset_json)) 
    type_json = dset_json["type"]
     
    dims = getChunkLayout(dset_json)
    log.debug("got dims: {}".format(dims))
    rank = len(dims)  
         
    # get chunk selection from query params
    if "select" in params:
        log.debug("select: {}".format(params["select"]))
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)   
    selection = tuple(selection)  
    log.debug("got selection: {}".format(selection))

    dt = createDataType(type_json)
    log.debug("dtype: {}".format(dt))

    rank = len(dims)
    if rank == 0:
        msg = "No dimension passed to GET chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug("selection[{}]: {}".format(i, s))

    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - useing s3path: {s3path}")
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_arr = await getChunk(app, chunk_id, dset_json, s3path=s3path, s3offset=s3offset, s3size=s3size)

    if chunk_arr is None:
        # return a 404
        msg = "Chunk {} does not exist".format(chunk_id)
        log.info(msg)
        raise HTTPNotFound()
     
    resp = None
    
    if "query" in params:
        # do query selection
        query = params["query"]
        log.info("query: {}".format(query))
        if rank != 1:
            msg = "Query selection only supported for one dimensional arrays"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        limit = 0
        if "Limit" in params:
            limit = int(params["Limit"])

        values = []
        indices = []
        field_names = [] 
        if dt.fields:
            field_names = list(dt.fields.keys())

        x = chunk_arr[selection]
        log.debug("x: {}".format(x))
        eval_str = getEvalStr(query, "x", field_names)
        log.debug("eval_str: {}".format(eval_str))
        where_result = np.where(eval(eval_str))
        log.debug("where_result: {}".format(where_result))
        where_result_index = where_result[0]
        log.debug("whare_result index: {}".format(where_result_index))
        log.debug("boolean selection: {}".format(x[where_result_index]))
        s = selection[0]
        count = 0
        for index in where_result_index:
            log.debug("index: {}".format(index))
            value = x[index].tolist()
            log.debug("value: {}".format(value))
            json_val = bytesArrayToList(value)
            log.debug("json_value: {}".format(json_val))
            json_index = index.tolist() * s.step + s.start  # adjust for selection
            indices.append(json_index)
            values.append(json_val)
            count += 1
            if limit > 0 and count >= limit:
                log.info("got limit items")
                break
         
        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # get requested data
        output_arr = chunk_arr[selection]
        output_data = arrayToBytes(output_arr)

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()

        finally:
            await resp.write_eof()

    return resp
Beispiel #3
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info("POST Chunk put points, num_points: {}".format(num_points))

        put_points = True
    else:
        log.info("POST Chunk get points")

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info("POST chunk_id: {}".format(chunk_id))
    chunk_index = getChunkIndex(chunk_id)
    log.debug("chunk_index: {}".format(chunk_index))
    
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    log.debug("dset_json: {}".format(dset_json))
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug("chunk_coord: {}".format(chunk_coord))
    
    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = "Unexpected content_type: {}".format(content_type)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
     
    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug("dtype: {}".format(dset_dtype))

    dims = getChunkLayout(dset_json)
    log.debug("got dims: {}".format(dims))
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request) 
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=put_points) 

   
    if put_points:
        # writing point data
        

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = "({},)uint64".format(rank)
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)
        if len(point_arr) != num_points:
            msg = "Unexpected size of point array, got: {} expected: {}".format(len(point_arr), num_points)
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        for i in range(num_points):
            elem = point_arr[i]
            if rank == 1:
                coord = int(elem[0])
            else:
                coord = tuple(elem[0]) # index to update
            val = elem[1]   # value 
            chunk_arr[coord] = val # update the point

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3   
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = now
        log.info("set {} to dirty".format(chunk_id))
    
    else:
        # reading point data  
        point_dt = np.dtype('uint64')  # use unsigned long for point index  
        point_arr = np.fromstring(input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug("got {} points".format(num_points))

        point_arr = point_arr.reshape((num_points, rank))    
        output_arr = np.zeros((num_points,), dtype=dset_dtype)
    
        for i in range(num_points):
            point = point_arr[i,:]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val
     
    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

        
    return resp
Beispiel #4
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points, num_points: {num_points}")

        put_points = True
    else:
        log.info("POST Chunk get points")
    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    log.debug(f"dset_json: {dset_json}")
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug(f"chunk_coord: {chunk_coord}")

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug(f"dtype: {dset_dtype}")

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size,
                               chunk_init=put_points)

    if chunk_arr is None:
        if put_points:
            log.error("no array returned for put_points")
            raise HTTPInternalServerError()
        else:
            # get points on a non-existent S3 objects?
            log.warn("S3 object not found for get points")
            raise HTTPNotFound()

    log.debug(f"chunk_arr.shape: {chunk_arr.shape}")

    if put_points:
        # writing point data

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = f"({rank},)uint64"
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)),
                               ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)

        if len(point_arr) != num_points:
            msg = f"Unexpected size of point array, got: {len(point_arr)} expected: {num_points}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        for i in range(num_points):
            elem = point_arr[i]
            log.debug(f"non-relative coordinate: {elem}")
            if rank == 1:
                coord = int(elem[0])
                coord = coord % chunk_layout[0]  # adjust to chunk relative

            else:
                coord = elem[0]  # index to update
                for dim in range(rank):
                    # adjust to chunk relative
                    coord[dim] = int(coord[dim]) % chunk_layout[dim]
                coord = tuple(coord)  # need to convert to a tuple
            log.debug(f"relative coordinate: {coord}")

            val = elem[1]  # value
            try:
                chunk_arr[coord] = val  # update the point
            except IndexError:
                msg = "Out of bounds point index for POST Chunk"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)
        log.info(f"set {chunk_id} to dirty")

    else:
        # reading point data
        point_dt = np.dtype('uint64')  # use unsigned long for point index
        point_arr = np.fromstring(
            input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug(f"got {num_points} points")

        point_arr = point_arr.reshape((num_points, rank))
        output_arr = np.zeros((num_points, ), dtype=dset_dtype)

        for i in range(num_points):
            point = point_arr[i, :]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val

    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp
Beispiel #5
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query
  
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
  
    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = "Unexpected content_type: {}".format(content_type)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    log.debug("dset_json: {}".format(dset_json))

    dims = getChunkLayout(dset_json)
   
    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)
    
    rank = len(dims)  
     
    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)   
    selection = tuple(selection)  
    log.debug("got selection: {}".format(selection))

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'  
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug("dtype: {}".format(dt))
    
    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug("selection[{}]: {}".format(i, s))

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent
        
    # check that the content_length is what we expect
    if itemsize != 'H5T_VARIABLE':
        log.debug("expect content_length: {}".format(num_elements*itemsize))
    log.debug("actual content_length: {}".format(request.content_length))

    if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length:
        msg = "Expected content_length of: {}, but got: {}".format(num_elements*itemsize, request.content_length)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

     # create a numpy array for incoming data
    input_bytes = await request_read(request)  # TBD - will it cause problems when failures are raised before reading data?
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()
        
    input_arr = bytesToArray(input_bytes, dt, mshape)

    chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=True)

    # update chunk array
    chunk_arr[selection] = input_arr
    chunk_cache = app["chunk_cache"]
    chunk_cache.setDirty(chunk_id)
    log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

    # async write to S3   
    dirty_ids = app["dirty_ids"]
    now = int(time.time())
    dirty_ids[chunk_id] = now
    
    # chunk update successful     
    resp = json_response({}, status=201)
    log.response(request, resp=resp)
    return resp
Beispiel #6
0
async def POST_Dataset(request):
    """HTTP method to create a new dataset object"""
    log.request(request)
    app = request.app

    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "POST Datasets with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()

    # get domain, check authorization
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    domain_json = await getDomainJson(app, domain, reload=True)
    root_id = domain_json["root"]

    aclCheck(domain_json, "create",
             username)  # throws exception if not allowed

    if "root" not in domain_json:
        msg = "Expected root key for domain: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    #
    # validate type input
    #
    if "type" not in body:
        msg = "POST Dataset has no type key in body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    datatype = body["type"]
    if isinstance(datatype, str) and datatype.startswith("t-"):
        # Committed type - fetch type json from DN
        ctype_id = datatype
        log.debug("got ctypeid: {}".format(ctype_id))
        ctype_json = await getObjectJson(app, ctype_id)
        log.debug("ctype: {}".format(ctype_json))
        if ctype_json["root"] != root_id:
            msg = "Referenced committed datatype must belong in same domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        datatype = ctype_json["type"]
        # add the ctype_id to type type
        datatype["id"] = ctype_id
    elif isinstance(datatype, str):
        try:
            # convert predefined type string (e.g. "H5T_STD_I32LE") to
            # corresponding json representation
            datatype = getBaseTypeJson(datatype)
            log.debug("got datatype: {}".format(datatype))
        except TypeError:
            msg = "POST Dataset with invalid predefined type"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    validateTypeItem(datatype)
    item_size = getItemSize(datatype)

    #
    # Validate shape input
    #
    dims = None
    shape_json = {}

    if "shape" not in body:
        shape_json["class"] = "H5S_SCALAR"
    else:
        shape = body["shape"]
        if isinstance(shape, int):
            shape_json["class"] = "H5S_SIMPLE"
            dims = [
                shape,
            ]
            shape_json["dims"] = dims
        elif isinstance(shape, str):
            # only valid string value is H5S_NULL
            if shape != "H5S_NULL":
                msg = "POST Datset with invalid shape value"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            shape_json["class"] = "H5S_NULL"
        elif isinstance(shape, list):
            if len(shape) == 0:
                shape_json["class"] = "H5S_SCALAR"
            else:
                shape_json["class"] = "H5S_SIMPLE"
                shape_json["dims"] = shape
                dims = shape
        else:
            msg = "Bad Request: shape is invalid"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if dims is not None:
        for i in range(len(dims)):
            extent = dims[i]
            if not isinstance(extent, int):
                msg = "Invalid shape type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            if extent < 0:
                msg = "shape dimension is negative"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

    maxdims = None
    if "maxdims" in body:
        if dims is None:
            msg = "Maxdims cannot be supplied if space is NULL"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        maxdims = body["maxdims"]
        if isinstance(maxdims, int):
            dim1 = maxdims
            maxdims = [dim1]
        elif isinstance(maxdims, list):
            pass  # can use as is
        else:
            msg = "Bad Request: maxdims is invalid"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if len(dims) != len(maxdims):
            msg = "Maxdims rank doesn't match Shape"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if maxdims is not None:
        for extent in maxdims:
            if not isinstance(extent, int):
                msg = "Invalid maxdims type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            if extent < 0:
                msg = "maxdims dimension is negative"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
        if len(maxdims) != len(dims):
            msg = "Bad Request: maxdims array length must equal shape array length"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        shape_json["maxdims"] = []
        for i in range(len(dims)):
            maxextent = maxdims[i]
            if not isinstance(maxextent, int):
                msg = "Bad Request: maxdims must be integer type"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            elif maxextent == 0:
                # unlimited dimension
                shape_json["maxdims"].append(0)
            elif maxextent < dims[i]:
                msg = "Bad Request: maxdims extent can't be smaller than shape extent"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            else:
                shape_json["maxdims"].append(maxextent)

    layout = None
    min_chunk_size = int(config.get("min_chunk_size"))
    max_chunk_size = int(config.get("max_chunk_size"))
    if 'creationProperties' in body:
        creationProperties = body["creationProperties"]
        if 'layout' in creationProperties:
            layout = creationProperties["layout"]

            validateChunkLayout(shape_json, item_size, layout)

    if layout is None and shape_json["class"] != "H5S_NULL":
        # default to chunked layout
        layout = {"class": "H5D_CHUNKED"}

    if layout and layout["class"] == 'H5D_CONTIGUOUS_REF':
        chunk_dims = getContiguousLayout(shape_json,
                                         item_size,
                                         chunk_min=min_chunk_size,
                                         chunk_max=max_chunk_size)
        layout["dims"] = chunk_dims
        log.debug(f"autoContiguous layout: {layout}")

    if layout and layout["class"] == 'H5D_CHUNKED' and "dims" not in layout:
        # do autochunking
        chunk_dims = guessChunk(shape_json, item_size)
        layout["dims"] = chunk_dims
        log.debug(f"initial autochunk layout: {layout}")

    if layout and layout["class"] == 'H5D_CHUNKED':
        chunk_dims = layout["dims"]
        chunk_size = getChunkSize(chunk_dims, item_size)

        log.debug("chunk_size: {}, min: {}, max: {}".format(
            chunk_size, min_chunk_size, max_chunk_size))
        # adjust the chunk shape if chunk size is too small or too big
        adjusted_chunk_dims = None
        if chunk_size < min_chunk_size:
            log.debug(
                "chunk size: {} less than min size: {}, expanding".format(
                    chunk_size, min_chunk_size))
            adjusted_chunk_dims = expandChunk(chunk_dims,
                                              item_size,
                                              shape_json,
                                              chunk_min=min_chunk_size,
                                              layout_class=layout["class"])
        elif chunk_size > max_chunk_size:
            log.debug(
                "chunk size: {} greater than max size: {}, expanding".format(
                    chunk_size, max_chunk_size, layout_class=layout["class"]))
            adjusted_chunk_dims = shrinkChunk(chunk_dims,
                                              item_size,
                                              chunk_max=max_chunk_size)
        if adjusted_chunk_dims:
            log.debug(
                f"requested chunk_dimensions: {chunk_dims} modified dimensions: {adjusted_chunk_dims}"
            )
            layout["dims"] = adjusted_chunk_dims

    if layout and layout["class"] in ('H5D_CHUNKED_REF',
                                      'H5D_CHUNKED_REF_INDIRECT'):
        chunk_dims = layout["dims"]
        chunk_size = getChunkSize(chunk_dims, item_size)

        log.debug("chunk_size: {}, min: {}, max: {}".format(
            chunk_size, min_chunk_size, max_chunk_size))
        # adjust the chunk shape if chunk size is too small or too big
        if chunk_size < min_chunk_size:
            log.warn(
                "chunk size: {} less than min size: {} for H5D_CHUNKED_REF dataset"
                .format(chunk_size, min_chunk_size))
        elif chunk_size > max_chunk_size:
            log.warn(
                "chunk size: {} greater than max size: {}, for H5D_CHUNKED_REF dataset"
                .format(chunk_size,
                        max_chunk_size,
                        layout_class=layout["class"]))

    link_id = None
    link_title = None
    if "link" in body:
        link_body = body["link"]
        if "id" in link_body:
            link_id = link_body["id"]
        if "name" in link_body:
            link_title = link_body["name"]
        if link_id and link_title:
            log.info("link id: {}".format(link_id))
            # verify that the referenced id exists and is in this domain
            # and that the requestor has permissions to create a link
            await validateAction(app, domain, link_id, username, "create")

    dset_id = createObjId("datasets", rootid=root_id)
    log.info("new  dataset id: {}".format(dset_id))

    dataset_json = {
        "id": dset_id,
        "root": root_id,
        "type": datatype,
        "shape": shape_json
    }

    if "creationProperties" in body:
        # TBD - validate all creationProperties
        creationProperties = body["creationProperties"]
        if "fillValue" in creationProperties:
            # validate fill value compatible with type
            dt = createDataType(datatype)
            fill_value = creationProperties["fillValue"]
            if isinstance(fill_value, list):
                fill_value = tuple(fill_value)
            try:
                np.asarray(fill_value, dtype=dt)
            except (TypeError, ValueError):
                msg = "Fill value {} not compatible with dataset type: {}".format(
                    fill_value, datatype)
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        dataset_json["creationProperties"] = creationProperties

    if layout is not None:
        dataset_json["layout"] = layout

    log.debug("create dataset: " + json.dumps(dataset_json))
    req = getDataNodeUrl(app, dset_id) + "/datasets"

    post_json = await http_post(app, req, data=dataset_json)

    # create link if requested
    if link_id and link_title:
        link_json = {}
        link_json["id"] = dset_id
        link_json["class"] = "H5L_TYPE_HARD"
        link_req = getDataNodeUrl(app, link_id)
        link_req += "/groups/" + link_id + "/links/" + link_title
        log.info("PUT link - : " + link_req)
        put_rsp = await http_put(app, link_req, data=link_json)
        log.debug("PUT Link resp: {}".format(put_rsp))

    # dataset creation successful
    resp = await jsonResponse(request, post_json, status=201)
    log.response(request, resp=resp)

    return resp
Beispiel #7
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
    else:
        expected_content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    resp = {}
    query_update = None
    limit = 0
    chunk_init = True
    input_arr = None
    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        if "Limit" in params:
            limit = int(params["Limit"])
        chunk_init = False
    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements *
                                           itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(
            request
        )  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=chunk_init,
                               bucket=bucket)
    is_dirty = False
    if query:
        values = []
        indices = []
        if chunk_arr is not None:
            # do query selection
            limit = 0
            if "Limit" in params:
                limit = int(params["Limit"])

            field_names = list(dt.fields.keys())
            replace_mask = [
                None,
            ] * len(field_names)
            for i in range(len(field_names)):
                field_name = field_names[i]
                if field_name in query_update:
                    replace_mask[i] = query_update[field_name]
            log.debug(f"replace_mask: {replace_mask}")

            x = chunk_arr[selection]
            log.debug(f"put_query - x: {x}")
            eval_str = getEvalStr(query, "x", field_names)
            log.debug(f"put_query - eval_str: {eval_str}")
            where_result = np.where(eval(eval_str))
            log.debug(f"put_query - where_result: {where_result}")
            where_result_index = where_result[0]
            log.debug(f"put_query - whare_result index: {where_result_index}")
            log.debug(
                f"put_query - boolean selection: {x[where_result_index]}")
            s = selection[0]
            count = 0
            for index in where_result_index:
                log.debug(f"put_query - index: {index}")
                value = x[index]
                log.debug(f"put_query - original value: {value}")
                for i in range(len(field_names)):
                    if replace_mask[i] is not None:
                        value[i] = replace_mask[i]
                log.debug(f"put_query - modified value: {value}")
                x[index] = value

                json_val = bytesArrayToList(value)
                log.debug(f"put_query - json_value: {json_val}")
                json_index = index.tolist(
                ) * s.step + s.start  # adjust for selection
                indices.append(json_index)
                values.append(json_val)
                count += 1
                is_dirty = True
                if limit > 0 and count >= limit:
                    log.info("put_query - got limit items")
                    break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # update chunk array
        chunk_arr[selection] = input_arr
        is_dirty = True
        resp = json_response({}, status=201)

    if is_dirty:
        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)
        log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    log.response(request, resp=resp)
    return resp
Beispiel #8
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    query_update = None
    limit = 0
    bucket = None
    input_arr = None

    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    if "Limit" in params:
        limit = int(params["Limit"])
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
        chunk_init = False  # don't initalize new chunks on query update
    else:
        expected_content_type = "application/octet-stream"
        chunk_init = True
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    # TBD - does this work with linked datasets?
    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    type_json = dset_json["type"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, chunk_init=chunk_init)
    is_dirty = False
    if chunk_arr is None:
        if chunk_init:
            log.error(f"failed to create numpy array")
            raise HTTPInternalServerError()
        else:
            log.warn(f"chunk {chunk_id} not found")
            raise HTTPNotFound()

    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        if rank != 1:
            log.error("expected one-dimensional array for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        # TBD - send back binary response to SN node
        try:
            resp = chunkQuery(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, slices=selection,
                query=query, query_update=query_update, limit=limit, return_json=True)
        except TypeError as te:
            log.warn(f"chunkQuery - TypeError: {te}")
            raise HTTPBadRequest()
        except ValueError as ve:
            log.warn(f"chunkQuery - ValueError: {ve}")
            raise HTTPBadRequest()
        if query_update and resp is not None:
            is_dirty = True


    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(request)  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

        chunkWriteSelection(chunk_arr=chunk_arr, slices=selection, data=input_arr)
        is_dirty = True

        # chunk update successful
        resp = {}
    if is_dirty:
        save_chunk(app, chunk_id, bucket=bucket)

    resp = json_response(resp, status=201)
    log.response(request, resp=resp)
    return resp
Beispiel #9
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" not in params:
        log.warn("expected count param")
        raise HTTPBadRequest()
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points - num_points: {num_points}")
        put_points = True
    else:
        log.info(f"POST Chunk get points - num_points: {num_points}")

    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    dims = getChunkLayout(dset_json)
    rank = len(dims)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    if rank == 1:
        coord_type_str = "uint64"
    else:
        coord_type_str = f"({rank},)uint64"

    if put_points:
        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        point_dt = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)])
        point_shape = (num_points,)
        chunk_init = True
    else:
        point_dt = np.dtype('uint64')
        point_shape = (num_points, rank)
        chunk_init = False

    point_arr = bytesToArray(input_bytes, point_dt, point_shape)

    chunk_arr = await get_chunk(app, chunk_id, dset_json, bucket=bucket, s3path=s3path, s3offset=s3offset, s3size=s3size, chunk_init=chunk_init)
    if chunk_arr is None:
        log.warn(f"chunk {chunk_id} not found")
        raise HTTPNotFound()

    if put_points:
        # writing point data
        try:
            chunkWritePoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkWritePoints: {ve}")
            raise HTTPBadRequest()
         # write empty response
        resp = json_response({})

        save_chunk(app, chunk_id, bucket=bucket) # lazily write chunk to storage
    else:
        # read points
        try:
            output_arr = chunkReadPoints(chunk_id=chunk_id, chunk_layout=dims, chunk_arr=chunk_arr, point_arr=point_arr)
        except ValueError as ve:
            log.warn(f"got value error from chunkReadPoints: {ve}")
            raise HTTPBadRequest()
        output_data = arrayToBytes(output_arr)
        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp
Beispiel #10
0
async def PUT_AttributeValue(request):
    """HTTP method to update an attributes data"""
    log.request(request)
    log.info("PUT_AttributeValue")
    app = request.app
    collection = getRequestCollectionName(request) # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    log.debug(f"Attribute name: [{attr_name}]")
    validateAttributeName(attr_name)

    log.info(f"PUT Attribute Value id: {obj_id} name: {attr_name}")
    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT AttributeValue with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPInternalServerError()

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "update")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(obj_id)) 
    log.debug(f"got dn_json: {dn_json}")

    attr_shape = dn_json["shape"]
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be updated"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    np_shape = getShapeDims(attr_shape)
    type_json = dn_json["type"]
    np_dtype = createDataType(type_json)  # np datatype

    request_type = "json"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type not in ("application/json", "application/octet-stream"):
            msg = f"Unknown content_type: {content_type}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if content_type == "application/octet-stream":
            log.debug("PUT AttributeValue - request_type is binary")
            request_type = "binary"
        else:
            log.debug("PUT AttribueValue - request type is json")

    binary_data = None
    if request_type == "binary":
        item_size = getItemSize(type_json)

        if item_size == 'H5T_VARIABLE':
            msg = "Only JSON is supported for variable length data types"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        # read binary data
        binary_data = await request.read()
        if len(binary_data) != request.content_length:
            msg = f"Read {len(binary_data)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

    arr = None  # np array to hold request data

    if binary_data:
        npoints = getNumElements(np_shape)
        if npoints*item_size != len(binary_data):
            msg = "Expected: " + str(npoints*item_size) + " bytes, but got: " + str(len(binary_data))
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        arr = np.fromstring(binary_data, dtype=np_dtype)
        arr = arr.reshape(np_shape)  # conform to selection shape
        # convert to JSON for transmission to DN
        data = arr.tolist()
        value = bytesArrayToList(data)
    else:
        body = await request.json()   

        if "value" not in body:
            msg = "PUT attribute value with no value in body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        value = body["value"]

        # validate that the value agrees with type/shape
        try:
            arr = jsonToArray(np_shape, np_dtype, value)
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    log.info(f"Got: {arr.size} array elements")
     
    # ready to add attribute now
    attr_json = {}
    attr_json["type"] = type_json
    attr_json["shape"] = attr_shape
    attr_json["value"] = value

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name  
    log.info(f"PUT Attribute Value: {req}")

    dn_json["value"] = value
    params = {}
    params = {"replace": 1}  # let the DN know we can overwrite the attribute
    if bucket:
        params["bucket"] = bucket
    put_rsp = await http_put(app, req, params=params, data=attr_json)
    log.info(f"PUT Attribute Value resp: {put_rsp}")
    
    hrefs = []  # TBD
    req_rsp = { "hrefs": hrefs }
    # attribute creation successful     
    resp = await jsonResponse(request, req_rsp)
    log.response(request, resp=resp)
    return resp
Beispiel #11
0
async def GET_AttributeValue(request):
    """HTTP method to return an attribute value"""
    log.request(request)
    app = request.app 
    log.info("GET_AttributeValue")
    collection = getRequestCollectionName(request) # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    validateAttributeName(attr_name)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)
    
    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain value: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "read")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.debug("get Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    dn_json = await http_get(app, req, params=params)
    log.debug("got attributes json from dn for obj_id: " + str(dn_json)) 

    attr_shape = dn_json["shape"]
    log.debug(f"attribute shape: {attr_shape}")
    if attr_shape["class"] == 'H5S_NULL':
        msg = "Null space attributes can not be read"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    accept_type = getAcceptType(request)
    response_type = accept_type    # will adjust later if binary not possible
    type_json = dn_json["type"]
    shape_json = dn_json["shape"]
    item_size = getItemSize(type_json)
    
    if item_size == 'H5T_VARIABLE' and accept_type != "json":
        msg = "Client requested binary, but only JSON is supported for variable length data types"
        log.info(msg)
        response_type = "json"

    if response_type == "binary":
        arr_dtype = createDataType(type_json)  # np datatype
        np_shape = getShapeDims(shape_json)
        try:
            arr = jsonToArray(np_shape, arr_dtype, dn_json["value"])
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        output_data = arr.tobytes()
        log.debug(f"GET AttributeValue - returning {len(output_data)} bytes binary data")
        # write response
        try: 
            resp = StreamResponse()
            resp.content_type = "application/octet-stream"
            resp.content_length = len(output_data)
            # allow CORS
            resp.headers['Access-Control-Allow-Origin'] = '*'
            resp.headers['Access-Control-Allow-Methods'] = "GET, POST, DELETE, PUT, OPTIONS"
            resp.headers['Access-Control-Allow-Headers'] = "Content-Type, api_key, Authorization"
            await resp.prepare(request)
            await resp.write(output_data)     
        except Exception as e:
            log.error(f"Got exception: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    else:
        resp_json = {}
        if "value" in dn_json:
            resp_json["value"] = dn_json["value"] 

        hrefs = []
        obj_uri = '/' + collection + '/' + obj_id
        attr_uri = obj_uri + '/attributes/' + attr_name
        hrefs.append({'rel': 'self', 'href': getHref(request, attr_uri)})
        hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
        hrefs.append({'rel': 'owner', 'href': getHref(request, obj_uri)})
        resp_json["hrefs"] = hrefs
        resp = await jsonResponse(request, resp_json)
        log.response(request, resp=resp)
    return resp
Beispiel #12
0
async def PUT_Attribute(request):
    """HTTP method to create a new attribute"""
    log.request(request)
    app = request.app
    collection = getRequestCollectionName(request) # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    log.debug(f"Attribute name: [{attr_name}]")
    validateAttributeName(attr_name)

    log.info(f"PUT Attribute id: {obj_id} name: {attr_name}")
    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT Attribute with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()   

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    
    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")
    root_id = domain_json["root"]

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "create")

    if "type" not in body:
        msg = "PUT attribute with no type in body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    datatype = body["type"]
    
    if isinstance(datatype, str) and datatype.startswith("t-"):
        # Committed type - fetch type json from DN
        ctype_id = datatype
        log.debug(f"got ctypeid: {ctype_id}") 
        ctype_json = await getObjectJson(app, ctype_id, bucket=bucket)  
        log.debug(f"ctype {ctype_id}: {ctype_json}")
        if ctype_json["root"] != root_id:
            msg = "Referenced committed datatype must belong in same domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        datatype = ctype_json["type"]
        # add the ctype_id to type type
        datatype["id"] = ctype_id
    elif isinstance(datatype, str):
        try:
            # convert predefined type string (e.g. "H5T_STD_I32LE") to 
            # corresponding json representation
            datatype = getBaseTypeJson(datatype)
            log.debug(f"got datatype: {datatype}")
        except TypeError:
            msg = "PUT attribute with invalid predefined type"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg) 

    validateTypeItem(datatype)
    
    dims = None
    shape_json = {}
    if "shape" in body:
        shape_body = body["shape"]
        shape_class = None
        if isinstance(shape_body, dict) and "class" in shape_body:
            shape_class = shape_body["class"]
        elif isinstance(shape_body, str):
            shape_class = shape_body
        if shape_class:
            if shape_class == "H5S_NULL":
                shape_json["class"] = "H5S_NULL"
                if isinstance(shape_body, dict) and "dims" in shape_body:
                    msg = "can't include dims with null shape"
                    log.warn(msg)
                    raise HTTPBadRequest(reason=msg)
                if isinstance(shape_body, dict) and "value" in body:
                    msg = "can't have H5S_NULL shape with value"
                    log.warn(msg)
                    raise HTTPBadRequest(reason=msg)
            elif shape_class == "H5S_SCALAR":
                shape_json["class"] = "H5S_SCALAR"
                dims = getShapeDims(shape_body)
                if len(dims) != 1 or dims[0] != 1:
                    msg = "dimensions aren't valid for scalar attribute"
                    log.warn(msg)
                    raise HTTPBadRequest(reason=msg)
            elif shape_class == "H5S_SIMPLE":
                shape_json["class"] = "H5S_SIMPLE"
                dims = getShapeDims(shape_body)
                shape_json["dims"] = dims
            else:
                msg = f"Unknown shape class: {shape_class}"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
        else:
            # no class, interpet shape value as dimensions and 
            # use H5S_SIMPLE as class
            if isinstance(shape_body, list) and len(shape_body) == 0:
                shape_json["class"] = "H5S_SCALAR"
                dims = [1,]
            else:
                shape_json["class"] = "H5S_SIMPLE"
                dims = getShapeDims(shape_body)
                shape_json["dims"] = dims
    else:
        shape_json["class"] = "H5S_SCALAR"
        dims = [1,]
 
    
    if "value" in body:
        if dims is None:
            msg = "Bad Request: data can not be included with H5S_NULL space"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        value = body["value"]
        # validate that the value agrees with type/shape
        arr_dtype = createDataType(datatype)  # np datatype
        if len(dims) == 0:
            np_dims = [1,]
        else:
            np_dims = dims
        log.debug(f"attribute dims: {np_dims}")
        log.debug(f"attribute value: {value}")
        try:
            arr = jsonToArray(np_dims, arr_dtype, value)
        except ValueError:
            msg = "Bad Request: input data doesn't match selection"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        log.info(f"Got: {arr.size} array elements")
    else:
        value = None

    # ready to add attribute now
    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.info("PUT Attribute: " + req)

    attr_json = {}
    attr_json["type"] = datatype
    attr_json["shape"] = shape_json
    if value is not None:
        attr_json["value"] = value
    params = {}
    if bucket:
        params["bucket"] = bucket
    
    put_rsp = await http_put(app, req, params=params, data=attr_json)
    log.info(f"PUT Attribute resp: {put_rsp}")
    
    hrefs = []  # TBD
    req_rsp = { "hrefs": hrefs }
    # attribute creation successful   
    resp = await jsonResponse(request, req_rsp, status=201)  
    log.response(request, resp=resp)
    return resp