def getShuffle(app, dset_id, dset_json): shuffle_size = 0 if isShuffle(dset_json): type_json = dset_json["type"] item_size = getItemSize(type_json) if item_size == 'H5T_VARIABLE': log.warn( f"shuffle filter can't be used on variable datatype for datasset: {dset_id}" ) else: shuffle_size = item_size log.debug(f"got shuffle_size: {shuffle_size}") else: log.debug("isShuffle is false") if shuffle_size > 1: shuffle_map = app['shuffle_map'] if dset_id not in shuffle_map: # save the shuffle size so the lazy chunk writer can access it shuffle_map[dset_id] = shuffle_size log.debug(f"update shuffle_map {dset_id}: {shuffle_size}") return shuffle_size
async def PUT_AttributeValue(request): """HTTP method to update an attributes data""" log.request(request) log.info("PUT_AttributeValue") app = request.app collection = getRequestCollectionName(request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') log.debug(f"Attribute name: [{attr_name}]") validateAttributeName(attr_name) log.info(f"PUT Attribute Value id: {obj_id} name: {attr_name}") username, pswd = getUserPasswordFromRequest(request) # write actions need auth await validateUserPassword(app, username, pswd) if not request.has_body: msg = "PUT AttributeValue with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPInternalServerError() # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "update") req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.debug("get Attribute: " + req) params = {} if bucket: params["bucket"] = bucket dn_json = await http_get(app, req, params=params) log.debug("got attributes json from dn for obj_id: " + str(obj_id)) log.debug(f"got dn_json: {dn_json}") attr_shape = dn_json["shape"] if attr_shape["class"] == 'H5S_NULL': msg = "Null space attributes can not be updated" log.warn(msg) raise HTTPBadRequest(reason=msg) np_shape = getShapeDims(attr_shape) type_json = dn_json["type"] np_dtype = createDataType(type_json) # np datatype request_type = "json" if "Content-Type" in request.headers: # client should use "application/octet-stream" for binary transfer content_type = request.headers["Content-Type"] if content_type not in ("application/json", "application/octet-stream"): msg = f"Unknown content_type: {content_type}" log.warn(msg) raise HTTPBadRequest(reason=msg) if content_type == "application/octet-stream": log.debug("PUT AttributeValue - request_type is binary") request_type = "binary" else: log.debug("PUT AttribueValue - request type is json") binary_data = None if request_type == "binary": item_size = getItemSize(type_json) if item_size == 'H5T_VARIABLE': msg = "Only JSON is supported for variable length data types" log.warn(msg) raise HTTPBadRequest(reason=msg) # read binary data binary_data = await request.read() if len(binary_data) != request.content_length: msg = f"Read {len(binary_data)} bytes, expecting: {request.content_length}" log.error(msg) raise HTTPInternalServerError() arr = None # np array to hold request data if binary_data: npoints = getNumElements(np_shape) if npoints*item_size != len(binary_data): msg = "Expected: " + str(npoints*item_size) + " bytes, but got: " + str(len(binary_data)) log.warn(msg) raise HTTPBadRequest(reason=msg) arr = np.fromstring(binary_data, dtype=np_dtype) arr = arr.reshape(np_shape) # conform to selection shape # convert to JSON for transmission to DN data = arr.tolist() value = bytesArrayToList(data) else: body = await request.json() if "value" not in body: msg = "PUT attribute value with no value in body" log.warn(msg) raise HTTPBadRequest(reason=msg) value = body["value"] # validate that the value agrees with type/shape try: arr = jsonToArray(np_shape, np_dtype, value) except ValueError: msg = "Bad Request: input data doesn't match selection" log.warn(msg) raise HTTPBadRequest(reason=msg) log.info(f"Got: {arr.size} array elements") # ready to add attribute now attr_json = {} attr_json["type"] = type_json attr_json["shape"] = attr_shape attr_json["value"] = value req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.info(f"PUT Attribute Value: {req}") dn_json["value"] = value params = {} params = {"replace": 1} # let the DN know we can overwrite the attribute if bucket: params["bucket"] = bucket put_rsp = await http_put(app, req, params=params, data=attr_json) log.info(f"PUT Attribute Value resp: {put_rsp}") hrefs = [] # TBD req_rsp = { "hrefs": hrefs } # attribute creation successful resp = await jsonResponse(request, req_rsp) log.response(request, resp=resp) return resp
async def POST_Dataset(request): """HTTP method to create a new dataset object""" log.request(request) app = request.app username, pswd = getUserPasswordFromRequest(request) # write actions need auth await validateUserPassword(app, username, pswd) if not request.has_body: msg = "POST Datasets with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) body = await request.json() # get domain, check authorization domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = "Invalid host value: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) domain_json = await getDomainJson(app, domain, reload=True) root_id = domain_json["root"] aclCheck(domain_json, "create", username) # throws exception if not allowed if "root" not in domain_json: msg = "Expected root key for domain: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) # # validate type input # if "type" not in body: msg = "POST Dataset has no type key in body" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = body["type"] if isinstance(datatype, str) and datatype.startswith("t-"): # Committed type - fetch type json from DN ctype_id = datatype log.debug("got ctypeid: {}".format(ctype_id)) ctype_json = await getObjectJson(app, ctype_id) log.debug("ctype: {}".format(ctype_json)) if ctype_json["root"] != root_id: msg = "Referenced committed datatype must belong in same domain" log.warn(msg) raise HTTPBadRequest(reason=msg) datatype = ctype_json["type"] # add the ctype_id to type type datatype["id"] = ctype_id elif isinstance(datatype, str): try: # convert predefined type string (e.g. "H5T_STD_I32LE") to # corresponding json representation datatype = getBaseTypeJson(datatype) log.debug("got datatype: {}".format(datatype)) except TypeError: msg = "POST Dataset with invalid predefined type" log.warn(msg) raise HTTPBadRequest(reason=msg) validateTypeItem(datatype) item_size = getItemSize(datatype) # # Validate shape input # dims = None shape_json = {} if "shape" not in body: shape_json["class"] = "H5S_SCALAR" else: shape = body["shape"] if isinstance(shape, int): shape_json["class"] = "H5S_SIMPLE" dims = [ shape, ] shape_json["dims"] = dims elif isinstance(shape, str): # only valid string value is H5S_NULL if shape != "H5S_NULL": msg = "POST Datset with invalid shape value" log.warn(msg) raise HTTPBadRequest(reason=msg) shape_json["class"] = "H5S_NULL" elif isinstance(shape, list): if len(shape) == 0: shape_json["class"] = "H5S_SCALAR" else: shape_json["class"] = "H5S_SIMPLE" shape_json["dims"] = shape dims = shape else: msg = "Bad Request: shape is invalid" log.warn(msg) raise HTTPBadRequest(reason=msg) if dims is not None: for i in range(len(dims)): extent = dims[i] if not isinstance(extent, int): msg = "Invalid shape type" log.warn(msg) raise HTTPBadRequest(reason=msg) if extent < 0: msg = "shape dimension is negative" log.warn(msg) raise HTTPBadRequest(reason=msg) maxdims = None if "maxdims" in body: if dims is None: msg = "Maxdims cannot be supplied if space is NULL" log.warn(msg) raise HTTPBadRequest(reason=msg) maxdims = body["maxdims"] if isinstance(maxdims, int): dim1 = maxdims maxdims = [dim1] elif isinstance(maxdims, list): pass # can use as is else: msg = "Bad Request: maxdims is invalid" log.warn(msg) raise HTTPBadRequest(reason=msg) if len(dims) != len(maxdims): msg = "Maxdims rank doesn't match Shape" log.warn(msg) raise HTTPBadRequest(reason=msg) if maxdims is not None: for extent in maxdims: if not isinstance(extent, int): msg = "Invalid maxdims type" log.warn(msg) raise HTTPBadRequest(reason=msg) if extent < 0: msg = "maxdims dimension is negative" log.warn(msg) raise HTTPBadRequest(reason=msg) if len(maxdims) != len(dims): msg = "Bad Request: maxdims array length must equal shape array length" log.warn(msg) raise HTTPBadRequest(reason=msg) shape_json["maxdims"] = [] for i in range(len(dims)): maxextent = maxdims[i] if not isinstance(maxextent, int): msg = "Bad Request: maxdims must be integer type" log.warn(msg) raise HTTPBadRequest(reason=msg) elif maxextent == 0: # unlimited dimension shape_json["maxdims"].append(0) elif maxextent < dims[i]: msg = "Bad Request: maxdims extent can't be smaller than shape extent" log.warn(msg) raise HTTPBadRequest(reason=msg) else: shape_json["maxdims"].append(maxextent) layout = None min_chunk_size = int(config.get("min_chunk_size")) max_chunk_size = int(config.get("max_chunk_size")) if 'creationProperties' in body: creationProperties = body["creationProperties"] if 'layout' in creationProperties: layout = creationProperties["layout"] validateChunkLayout(shape_json, item_size, layout) if layout is None and shape_json["class"] != "H5S_NULL": # default to chunked layout layout = {"class": "H5D_CHUNKED"} if layout and layout["class"] == 'H5D_CONTIGUOUS_REF': chunk_dims = getContiguousLayout(shape_json, item_size, chunk_min=min_chunk_size, chunk_max=max_chunk_size) layout["dims"] = chunk_dims log.debug(f"autoContiguous layout: {layout}") if layout and layout["class"] == 'H5D_CHUNKED' and "dims" not in layout: # do autochunking chunk_dims = guessChunk(shape_json, item_size) layout["dims"] = chunk_dims log.debug(f"initial autochunk layout: {layout}") if layout and layout["class"] == 'H5D_CHUNKED': chunk_dims = layout["dims"] chunk_size = getChunkSize(chunk_dims, item_size) log.debug("chunk_size: {}, min: {}, max: {}".format( chunk_size, min_chunk_size, max_chunk_size)) # adjust the chunk shape if chunk size is too small or too big adjusted_chunk_dims = None if chunk_size < min_chunk_size: log.debug( "chunk size: {} less than min size: {}, expanding".format( chunk_size, min_chunk_size)) adjusted_chunk_dims = expandChunk(chunk_dims, item_size, shape_json, chunk_min=min_chunk_size, layout_class=layout["class"]) elif chunk_size > max_chunk_size: log.debug( "chunk size: {} greater than max size: {}, expanding".format( chunk_size, max_chunk_size, layout_class=layout["class"])) adjusted_chunk_dims = shrinkChunk(chunk_dims, item_size, chunk_max=max_chunk_size) if adjusted_chunk_dims: log.debug( f"requested chunk_dimensions: {chunk_dims} modified dimensions: {adjusted_chunk_dims}" ) layout["dims"] = adjusted_chunk_dims if layout and layout["class"] in ('H5D_CHUNKED_REF', 'H5D_CHUNKED_REF_INDIRECT'): chunk_dims = layout["dims"] chunk_size = getChunkSize(chunk_dims, item_size) log.debug("chunk_size: {}, min: {}, max: {}".format( chunk_size, min_chunk_size, max_chunk_size)) # adjust the chunk shape if chunk size is too small or too big if chunk_size < min_chunk_size: log.warn( "chunk size: {} less than min size: {} for H5D_CHUNKED_REF dataset" .format(chunk_size, min_chunk_size)) elif chunk_size > max_chunk_size: log.warn( "chunk size: {} greater than max size: {}, for H5D_CHUNKED_REF dataset" .format(chunk_size, max_chunk_size, layout_class=layout["class"])) link_id = None link_title = None if "link" in body: link_body = body["link"] if "id" in link_body: link_id = link_body["id"] if "name" in link_body: link_title = link_body["name"] if link_id and link_title: log.info("link id: {}".format(link_id)) # verify that the referenced id exists and is in this domain # and that the requestor has permissions to create a link await validateAction(app, domain, link_id, username, "create") dset_id = createObjId("datasets", rootid=root_id) log.info("new dataset id: {}".format(dset_id)) dataset_json = { "id": dset_id, "root": root_id, "type": datatype, "shape": shape_json } if "creationProperties" in body: # TBD - validate all creationProperties creationProperties = body["creationProperties"] if "fillValue" in creationProperties: # validate fill value compatible with type dt = createDataType(datatype) fill_value = creationProperties["fillValue"] if isinstance(fill_value, list): fill_value = tuple(fill_value) try: np.asarray(fill_value, dtype=dt) except (TypeError, ValueError): msg = "Fill value {} not compatible with dataset type: {}".format( fill_value, datatype) log.warn(msg) raise HTTPBadRequest(reason=msg) dataset_json["creationProperties"] = creationProperties if layout is not None: dataset_json["layout"] = layout log.debug("create dataset: " + json.dumps(dataset_json)) req = getDataNodeUrl(app, dset_id) + "/datasets" post_json = await http_post(app, req, data=dataset_json) # create link if requested if link_id and link_title: link_json = {} link_json["id"] = dset_id link_json["class"] = "H5L_TYPE_HARD" link_req = getDataNodeUrl(app, link_id) link_req += "/groups/" + link_id + "/links/" + link_title log.info("PUT link - : " + link_req) put_rsp = await http_put(app, link_req, data=link_json) log.debug("PUT Link resp: {}".format(put_rsp)) # dataset creation successful resp = await jsonResponse(request, post_json, status=201) log.response(request, resp=resp) return resp
async def GET_AttributeValue(request): """HTTP method to return an attribute value""" log.request(request) app = request.app log.info("GET_AttributeValue") collection = getRequestCollectionName(request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') validateAttributeName(attr_name) username, pswd = getUserPasswordFromRequest(request) if username is None and app['allow_noauth']: username = "******" else: await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain value: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "read") req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.debug("get Attribute: " + req) params = {} if bucket: params["bucket"] = bucket dn_json = await http_get(app, req, params=params) log.debug("got attributes json from dn for obj_id: " + str(dn_json)) attr_shape = dn_json["shape"] log.debug(f"attribute shape: {attr_shape}") if attr_shape["class"] == 'H5S_NULL': msg = "Null space attributes can not be read" log.warn(msg) raise HTTPBadRequest(reason=msg) accept_type = getAcceptType(request) response_type = accept_type # will adjust later if binary not possible type_json = dn_json["type"] shape_json = dn_json["shape"] item_size = getItemSize(type_json) if item_size == 'H5T_VARIABLE' and accept_type != "json": msg = "Client requested binary, but only JSON is supported for variable length data types" log.info(msg) response_type = "json" if response_type == "binary": arr_dtype = createDataType(type_json) # np datatype np_shape = getShapeDims(shape_json) try: arr = jsonToArray(np_shape, arr_dtype, dn_json["value"]) except ValueError: msg = "Bad Request: input data doesn't match selection" log.warn(msg) raise HTTPBadRequest(reason=msg) output_data = arr.tobytes() log.debug(f"GET AttributeValue - returning {len(output_data)} bytes binary data") # write response try: resp = StreamResponse() resp.content_type = "application/octet-stream" resp.content_length = len(output_data) # allow CORS resp.headers['Access-Control-Allow-Origin'] = '*' resp.headers['Access-Control-Allow-Methods'] = "GET, POST, DELETE, PUT, OPTIONS" resp.headers['Access-Control-Allow-Headers'] = "Content-Type, api_key, Authorization" await resp.prepare(request) await resp.write(output_data) except Exception as e: log.error(f"Got exception: {e}") raise HTTPInternalServerError() finally: await resp.write_eof() else: resp_json = {} if "value" in dn_json: resp_json["value"] = dn_json["value"] hrefs = [] obj_uri = '/' + collection + '/' + obj_id attr_uri = obj_uri + '/attributes/' + attr_name hrefs.append({'rel': 'self', 'href': getHref(request, attr_uri)}) hrefs.append({'rel': 'home', 'href': getHref(request, '/')}) hrefs.append({'rel': 'owner', 'href': getHref(request, obj_uri)}) resp_json["hrefs"] = hrefs resp = await jsonResponse(request, resp_json) log.response(request, resp=resp) return resp