コード例 #1
0
ファイル: chunk_dn.py プロジェクト: pcrespov/hsds
async def GET_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {params.keys()}")

    s3path = None
    s3offset = 0
    s3size = 0
    bucket = None
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    if "dset" in params:
        msg = "Unexpected dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")
    type_json = dset_json["type"]

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    rank = len(dims)
    if rank == 0:
        msg = "No dimension passed to GET chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size)

    if chunk_arr is None:
        # return a 404
        msg = f"Chunk {chunk_id} does not exist"
        log.info(msg)
        raise HTTPNotFound()

    resp = None

    if "query" in params:
        # do query selection
        query = params["query"]
        log.info(f"query: {query}")
        if rank != 1:
            msg = "Query selection only supported for one dimensional arrays"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        limit = 0
        if "Limit" in params:
            limit = int(params["Limit"])

        values = []
        indices = []
        field_names = []
        if dt.fields:
            field_names = list(dt.fields.keys())

        x = chunk_arr[selection]
        log.debug(f"x: {x}")
        eval_str = getEvalStr(query, "x", field_names)
        log.debug(f"eval_str: {eval_str}")
        where_result = np.where(eval(eval_str))
        log.debug(f"where_result: {where_result}")
        where_result_index = where_result[0]
        log.debug(f"whare_result index: {where_result_index}")
        log.debug(f"boolean selection: {x[where_result_index]}")
        s = selection[0]
        count = 0
        for index in where_result_index:
            log.debug(f"index: {index}")
            value = x[index].tolist()
            log.debug(f"value: {value}")
            json_val = bytesArrayToList(value)
            log.debug(f"json_value: {json_val}")
            json_index = index.tolist(
            ) * s.step + s.start  # adjust for selection
            indices.append(json_index)
            values.append(json_val)
            count += 1
            if limit > 0 and count >= limit:
                log.info("got limit items")
                break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # get requested data
        output_arr = chunk_arr[selection]
        output_data = arrayToBytes(output_arr)

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()

        finally:
            await resp.write_eof()

    return resp
コード例 #2
0
async def GET_Group(request):
    """HTTP method to return JSON for group"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    h5path = None
    getAlias = False
    include_links = False
    include_attrs = False
    group_id = request.match_info.get('id')
    if not group_id and "h5path" not in params:
        # no id, or path provided, so bad request
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if group_id:
        log.info(f"GET_Group, id: {group_id}")
        # is the id a group id and not something else?
        if not isValidUuid(group_id, "Group"):
            msg = f"Invalid group id: {group_id}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if "getalias" in params:
            if params["getalias"]:
                getAlias = True
    if "h5path" in params:
        h5path = params["h5path"]
        if not group_id and h5path[0] != '/':
            msg = "h5paths must be absolute if no parent id is provided"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        log.info(f"GET_Group, h5path: {h5path}")
    if "include_links" in params and params["include_links"]:
        include_links = True
    if "include_attrs" in params and params["include_attrs"]:
        include_attrs = True

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    if h5path and h5path[0] == '/':
        # ignore the request path id (if given) and start
        # from root group for absolute paths

        domain_json = await getDomainJson(app, domain)
        if "root" not in domain_json:
            msg = f"Expected root key for domain: {domain}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        group_id = domain_json["root"]

    if h5path:
        group_id = await getObjectIdByPath(app,
                                           group_id,
                                           h5path,
                                           bucket=bucket
                                           )  # throws 404 if not found
        if not isValidUuid(group_id, "Group"):
            msg = f"No group exist with the path: {h5path}"
            log.warn(msg)
            raise HTTPNotFound()
        log.info(f"get group_id: {group_id} from h5path: {h5path}")

    # verify authorization to read the group
    await validateAction(app, domain, group_id, username, "read")

    # get authoritative state for group from DN (even if it's in the meta_cache).
    group_json = await getObjectJson(app,
                                     group_id,
                                     refresh=True,
                                     include_links=include_links,
                                     include_attrs=include_attrs,
                                     bucket=bucket)
    log.debug(f"domain from request: {domain}")
    group_json["domain"] = getPathForDomain(domain)
    if bucket:
        group_json["bucket"] = bucket

    if getAlias:
        root_id = group_json["root"]
        alias = []
        if group_id == root_id:
            alias.append('/')
        else:
            idpath_map = {root_id: '/'}
            h5path = await getPathForObjectId(app,
                                              root_id,
                                              idpath_map,
                                              tgt_id=group_id,
                                              bucket=bucket)
            if h5path:
                alias.append(h5path)
        group_json["alias"] = alias

    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({'rel': 'self', 'href': getHref(request, group_uri)})
    hrefs.append({
        'rel': 'links',
        'href': getHref(request, group_uri + '/links')
    })
    root_uri = '/groups/' + group_json["root"]
    hrefs.append({'rel': 'root', 'href': getHref(request, root_uri)})
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({
        'rel': 'attributes',
        'href': getHref(request, group_uri + '/attributes')
    })
    group_json["hrefs"] = hrefs

    resp = await jsonResponse(request, group_json)
    log.response(request, resp=resp)
    return resp
コード例 #3
0
ファイル: import_ghcn_file.py プロジェクト: whigg/hsds
def sig_handler(sig, frame):
    log.warn("Caught signal: {}".format(str(sig)))
    print_results()
    sys.exit()
コード例 #4
0
ファイル: link_dn.py プロジェクト: t20100/hsds
async def GET_Links(request):
    """HTTP GET method to return JSON for a link collection
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    log.info(f"GET links: {group_id}")
    if not isValidUuid(group_id, obj_class="group"):
        log.error(f"Unexpected group_id: {group_id}")
        raise HTTPInternalServerError()

    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
            log.info(f"GET_Links - using Limit: {limit}")
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.error(msg)  # should be validated by SN
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]
        log.info(f"GET_Links - using Marker: {marker}")

    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    group_json = await get_metadata_obj(app, group_id, bucket=bucket)

    log.info(f"for id: {group_id} got group json: {group_json}")
    if "links" not in group_json:
        msg.error(f"unexpected group data for id: {group_id}")
        raise HTTPInternalServerError()

    # return a list of links based on sorted dictionary keys
    link_dict = group_json["links"]
    titles = list(link_dict.keys())
    titles.sort()  # sort by key
    # TBD: provide an option to sort by create date

    start_index = 0
    if marker is not None:
        start_index = index(titles, marker) + 1
        if start_index == 0:
            # marker not found, return 404
            msg = f"Link marker: {marker}, not found"
            log.warn(msg)
            raise HTTPNotFound()

    end_index = len(titles)
    if limit is not None and (end_index - start_index) > limit:
        end_index = start_index + limit

    link_list = []
    for i in range(start_index, end_index):
        title = titles[i]
        link = copy(link_dict[title])
        link["title"] = title
        link_list.append(link)

    resp_json = {"links": link_list}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
コード例 #5
0
async def healthCheck(app):
    """ Periodic method that either registers with headnode (if state in INITIALIZING) or
    calls headnode to verify vitals about this node (otherwise)"""

    # let the server event loop startup before sarting the health check
    await asyncio.sleep(1)
    log.info("health check start")
    sleep_secs = config.get("node_sleep_time")

    while True:
        print("node_state:", app["node_state"])
        if "oio_proxy" in app:
            # for OIO post registration request every time interval
            await oio_register(app)
        elif "is_k8s" in app:
            await k8s_register(app)

        elif app["node_state"] == "INITIALIZING" or (
                app["node_state"] == "WAITING" and app["node_number"] < 0):
            # startup docker registration
            await register(app)
        else:
            # check in with the head node and make sure we are still active
            head_url = getHeadUrl(app)
            req_node = "{}/nodestate".format(head_url)
            log.debug("health check req {}".format(req_node))
            try:
                rsp_json = await http_get(app, req_node)
                if rsp_json is None or not isinstance(rsp_json, dict):
                    log.warn(
                        "invalid health check response: type: {} text: {}".
                        format(type(rsp_json), rsp_json))
                else:
                    log.debug("cluster_state: {}".format(
                        rsp_json["cluster_state"]))
                    if rsp_json["cluster_state"] != "READY" and app[
                            "node_state"] == "READY":
                        log.info("changing node_state to WAITING")
                        app["node_state"] = "WAITING"

                    # save the url's to each of the active nodes'
                    sn_urls = {}
                    dn_urls = {}
                    #  or rsp_json["host"] is None or rsp_json["id"] != app["id"]
                    this_node = None
                    for node in rsp_json["nodes"]:
                        if node["node_type"] == app["node_type"] and node[
                                "node_number"] == app["node_number"]:
                            # this should be this node

                            if node["id"] != app["id"]:
                                # flag - to re-register
                                log.warn(
                                    "mis-match node ids, app: {} vs head: {} - re-initializing"
                                    .format(node["id"], app["id"]))
                                app["node_state"] == "INITIALIZING"
                                app["node_number"] = -1
                                break
                            if not node["host"]:
                                # flag - to re-register
                                log.warn(
                                    "host not set for this node  - re-initializing"
                                    .format(node["id"], app["id"]))
                                app["node_state"] == "INITIALIZING"
                                app["node_number"] = -1
                                break
                        if not node["host"]:
                            continue  # not online
                        this_node = copy(node)
                        url = "http://" + node["host"] + ":" + str(
                            node["port"])
                        node_number = node["node_number"]
                        if node["node_type"] == "dn":
                            dn_urls[node_number] = url
                        elif node["node_type"] == "sn":
                            sn_urls[node_number] = url
                        else:
                            log.error(
                                "Unexpected node_type for node: {}".format(
                                    node))
                    app["sn_urls"] = sn_urls
                    log.debug(f"sn_urls: {sn_urls}")
                    app["dn_urls"] = dn_urls
                    log.debug(f"dn_urls: {dn_urls}")

                    if this_node is None and rsp_json[
                            "cluster_state"] != "READY":
                        log.warn("this node not found, re-initialize")
                        app["node_state"] == "INITIALIZING"
                        app["node_number"] = -1

                    if app["node_state"] == "WAITING" and rsp_json[
                            "cluster_state"] == "READY" and app[
                                "node_number"] >= 0:
                        log.info(
                            "setting node_state to READY, node_number: {}".
                            format(app["node_number"]))
                        app["node_state"] = "READY"
                    log.info("health check ok")
            except ClientError as ce:
                log.warn(f"ClientError: {ce} for health check")
            except HTTPInternalServerError as he:
                log.warn(
                    f"HTTPInternalServiceError <{he.code}> for health check")
            except HTTPNotFound as hnf:
                log.warn(f"HTTPNotFound <{hnf.code}> for health check")
            except HTTPGone as hg:
                log.warn(f"HTTPGone <{hg.code}> for health heck")

        svmem = psutil.virtual_memory()
        num_tasks = len(asyncio.Task.all_tasks())
        active_tasks = len(
            [task for task in asyncio.Task.all_tasks() if not task.done()])
        log.debug(
            f"health check sleep: {sleep_secs}, vm: {svmem.percent} num tasks: {num_tasks} active tasks: {active_tasks}"
        )
        await asyncio.sleep(sleep_secs)
コード例 #6
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def GET_ACL(request):
    """HTTP method to return JSON for given domain/ACL"""
    log.request(request)
    app = request.app

    acl_username = request.match_info.get('username')
    if not acl_username:
        msg = "Missing username for ACL"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code in (404, 410):
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    # validate that the requesting user has permission to read ACLs in this domain
    if acl_username in (username, "default"):
        # allow read access for a users on ACL, or default
        aclCheck(domain_json, "read",
                 username)  # throws exception if not authorized
    else:
        aclCheck(domain_json, "readACL",
                 username)  # throws exception if not authorized

    if 'owner' not in domain_json:
        log.warn("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.warn("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug("got domain_json: {}".format(domain_json))

    if acl_username not in acls:
        msg = "acl for username: [{}] not found".format(acl_username)
        log.warn(msg)
        raise HTTPNotFound()

    acl = acls[acl_username]
    acl_rsp = {}
    for k in acl.keys():
        acl_rsp[k] = acl[k]
    acl_rsp["userName"] = acl_username

    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acl"] = acl_rsp
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
コード例 #7
0
ファイル: headnode.py プロジェクト: PjEdwards/hsds
async def healthCheck(app):
    """ Periodic method that pings each active node and verifies it is still healthy.  
    If node doesn't respond, free up the node slot (the node can re-register if it comes back)'"""

    app["last_health_check"] = int(time.time())
    
    nodes = app["nodes"]
    while True:
        # sleep for a bit
        sleep_secs = config.get("head_sleep_time")
        await  asyncio.sleep(sleep_secs)

        now = int(time.time())
        log.info("health check {}".format(unixTimeToUTC(now)))
        
        fail_count = 0
        HEALTH_CHECK_RETRY_COUNT = 1 # times to try before calling a node dead
        for node in nodes:         
            if node["host"] is None:
                fail_count += 1
                continue
            url = getUrl(node["host"], node["port"]) + "/info"  
            try:
                rsp_json = await http_get(app, url)
                if "node" not in rsp_json:
                    log.error("Unexpected response from node")
                    fail_count += 1
                    continue
                node_state = rsp_json["node"]
                node_id = node_state["id"]
                
                if node_id != node['id']:
                    log.warn("unexpected node_id: {} (expecting: {})".format(node_id, node['id']))
                    node['host'] = None
                    node['id'] = None
                    fail_count += 1
                    continue

                if 'number' in node_state and node_state['number'] != node['node_number']:
                    msg = "unexpected node_number got {} (expecting: {})"
                    log.warn(msg.format(node_state["number"], node['node_number']))
                    node['host'] = None
                    node['id'] = None
                    fail_count += 1
                    continue
                    
                # save off other useful info from the node
                app_node_stats = app["node_stats"]
                node_stats = {}
                for k in NODE_STAT_KEYS:
                    node_stats[k] = rsp_json[k]
                app_node_stats[node_id] = node_stats
                # mark the last time we got a response from this node
                node["healthcheck"] = unixTimeToUTC(int(time.time()))
                node["failcount"] = 0 # rest
            except OSError as ose:
                log.warn("OSError for req: {}: {}".format(url, str(ose)))
                # node has gone away?
                node["failcount"] += 1
                if node["failcount"] >= HEALTH_CHECK_RETRY_COUNT:
                    log.warn("node {}:{} not responding".format(node["host"], node["port"]))
                    fail_count += 1
                
            except HTTPInternalServerError as hpe:
                log.warn("HTTPInternalServerError for req: {}: {}".format(url, str(hpe)))
                # node has gone away?
                node["failcount"] += 1
                if node["failcount"] >= HEALTH_CHECK_RETRY_COUNT:
                    log.warn("removing {}:{} from active list".format(node["host"], node["port"]))
                    fail_count += 1
            except TimeoutError as toe:
                log.warn("Timeout error for req: {}: {}".format(url, str(toe)))
                # node has gone away?
                node["failcount"] += 1
                if node["failcount"] >= HEALTH_CHECK_RETRY_COUNT:
                    log.warn("removing {}:{} from active list".format(node["host"], node["port"]))
                    fail_count += 1
        log.info("node health check fail_count: {}".format(fail_count))
        if fail_count > 0:
            if app["cluster_state"] == "READY":
                # go back to INITIALIZING state until another node is registered
                log.warn("Fail_count > 0, Setting cluster_state from READY to INITIALIZING")
                app["cluster_state"] = "INITIALIZING"
        elif fail_count == 0 and app["cluster_state"] != "READY":
            log.info("All nodes healthy, changing cluster state to READY")
            app["cluster_state"] = "READY"
コード例 #8
0
ファイル: azureBlobClient.py プロジェクト: t20100/hsds
    async def put_object(self, key, data, bucket=None):
        """ Write data to given key.
            Returns client specific dict on success
        """
        if not bucket:
            log.error("put_object - bucket not set")
            raise HTTPInternalServerError()

        start_time = time.time()
        log.debug(
            f"azureBlobClient.put_object({bucket}/{key} start: {start_time}")
        try:
            async with self._client.get_blob_client(container=bucket,
                                                    blob=key) as blob_client:
                blob_rsp = await blob_client.upload_blob(data,
                                                         blob_type='BlockBlob',
                                                         overwrite=True)

            finish_time = time.time()
            ETag = blob_rsp["etag"]
            lastModified = int(blob_rsp["last_modified"].timestamp())
            data_size = len(data)
            rsp = {
                "ETag": ETag,
                "size": data_size,
                "LastModified": lastModified
            }
            log.debug(f"put_object {key} returning: {rsp}")

            log.info(
                f"azureBlobClient.put_object({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
            )

        except CancelledError as cle:
            self._azure_stats_increment("error_count")
            msg = f"azureBlobClient.CancelledError for put_object {key}: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()
        except Exception as e:
            if isinstance(e, AzureError):
                if e.status_code == 404:
                    msg = f"azureBlobClient.key: {key} not found "
                    log.warn(msg)
                    raise HTTPNotFound()
                elif e.status_code in (401, 403):
                    msg = f"azureBlobClient.access denied for get key: {key}"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._azure_stats_increment("error_count")
                    log.error(
                        f"azureBlobClient.got unexpected AzureError for get_object {key}: {e.message}"
                    )
                    raise HTTPInternalServerError()
            else:
                log.error(
                    f"azureBlobClient.Unexpected exception for put_object {key}: {e}"
                )
                raise HTTPInternalServerError()

        if data and len(data) > 0:
            self._azure_stats_increment("bytes_out", inc=len(data))
        log.debug(f"azureBlobClient.put_object {key} complete, rsp: {rsp}")
        return rsp
コード例 #9
0
ファイル: azureBlobClient.py プロジェクト: t20100/hsds
    async def list_keys(self,
                        prefix='',
                        deliminator='',
                        suffix='',
                        include_stats=False,
                        callback=None,
                        bucket=None,
                        limit=None):
        """ return keys matching the arguments
        """
        if not bucket:
            log.error("list_keys - bucket not set")
            raise HTTPInternalServerError()
        log.info(
            f"list_keys('{prefix}','{deliminator}','{suffix}', include_stats={include_stats}"
        )
        if include_stats:
            # use a dictionary to hold return values
            key_names = {}
        else:
            # just use a list
            key_names = []
        continuation_token = None
        page_result_count = 1000  # compatible with what S3 uses by default
        if prefix == '':
            prefix = None  # azure sdk expects None for no prefix
        try:
            async with self._client.get_container_client(
                    container=bucket) as container_client:
                while True:
                    log.info(
                        f"list_blobs: {prefix} continuation_token: {continuation_token}"
                    )
                    keyList = container_client.walk_blobs(
                        name_starts_with=prefix,
                        delimiter=deliminator,
                        results_per_page=page_result_count).by_page(
                            continuation_token)

                    async for key in await keyList.__anext__():
                        key_name = key["name"]
                        if include_stats:
                            ETag = key["etag"]
                            lastModified = int(
                                key["last_modified"].timestamp())
                            data_size = key["size"]
                            key_names[key_name] = {
                                "ETag": ETag,
                                "Size": data_size,
                                "LastModified": lastModified
                            }
                        else:
                            key_names.append(key_name)
                    if callback:
                        if iscoroutinefunction(callback):
                            await callback(self._app, key_names)
                        else:
                            callback(self._app, key_names)
                    if not keyList.continuation_token or (
                            limit and len(key_names) >= limit):
                        # got all the keys (or as many as requested)
                        break
                    else:
                        # keep going
                        continuation_token = keyList.continuation_token

        except CancelledError as cle:
            self._azure_stats_increment("error_count")
            msg = f"azureBlobClient.CancelledError for list_keys: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()
        except Exception as e:
            if isinstance(e, AzureError):
                if e.status_code == 404:
                    msg = f"azureBlobClient not found error for list_keys"
                    log.warn(msg)
                    raise HTTPNotFound()
                elif e.status_code in (401, 403):
                    msg = f"azureBlobClient.access denied for list_keys"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._azure_stats_increment("error_count")
                    log.error(
                        f"azureBlobClient.got unexpected AzureError for list_keys: {e.message}"
                    )
                    raise HTTPInternalServerError()
            else:
                log.error(
                    f"azureBlobClient.Unexpected exception for list_keys: {e}")
                raise HTTPInternalServerError()

        log.info(f"list_keys done, got {len(key_names)} keys")
        if limit and len(key_names) > limit:
            # return requested number of keys
            if include_stats:
                keys = list(key_names.keys())
                keys.sort()
                for k in keys[limit:]:
                    del key_names[k]
            else:
                key_names = key_names[:limit]

        return key_names
コード例 #10
0
ファイル: link_sn.py プロジェクト: t20100/hsds
async def PUT_Link(request):
    """HTTP method to create a new link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = f"Invalid group id: {group_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    log.info(f"PUT Link_title: [{link_title}]")
    validateLinkName(link_title)


    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT Link with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()

    link_json = {}
    if "id" in body:
        if not isValidUuid(body["id"]):
            msg = "PUT Link with invalid id in body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        link_json["id"] = body["id"]
        link_json["class"] = "H5L_TYPE_HARD"

    elif "h5path" in body:
        link_json["h5path"] = body["h5path"]
        # could be hard or soft link
        if "h5domain" in body:
            link_json["h5domain"] = body["h5domain"]
            link_json["class"] = "H5L_TYPE_EXTERNAL"
        else:
            # soft link
            link_json["class"] = "H5L_TYPE_SOFT"
    else:
        msg = "PUT Link with no id or h5path keys"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    await validateAction(app, domain, group_id, username, "create")


    # for hard links, verify that the referenced id exists and is in this domain
    if "id" in body:
        ref_id = body["id"]
        ref_json = await getObjectJson(app, ref_id, bucket=bucket)
        group_json = await getObjectJson(app, group_id, bucket=bucket)
        if ref_json["root"] != group_json["root"]:
            msg = "Hard link must reference an object in the same domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    # ready to add link now
    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    log.debug("PUT link - getting group: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    put_rsp = await http_put(app, req, data=link_json, params=params)
    log.debug("PUT Link resp: " + str(put_rsp))

    hrefs = []  # TBD
    req_rsp = { "hrefs": hrefs }
    # link creation successful
    resp = await jsonResponse(request, req_rsp, status=201)
    log.response(request, resp=resp)
    return resp
コード例 #11
0
ファイル: link_sn.py プロジェクト: t20100/hsds
async def GET_Links(request):
    """HTTP method to return JSON for link collection"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = f"Invalid group id: {group_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links"
    query_sep = '?'
    if limit is not None:
        req += query_sep + "Limit=" + str(limit)
        query_sep = '&'
    if marker is not None:
        req += query_sep + "Marker=" + marker

    log.debug("get LINKS: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    links_json = await http_get(app, req, params=params)
    log.debug(f"got links json from dn for group_id: {group_id}")
    links = links_json["links"]

    # mix in collection key, target and hrefs
    for link in links:
        if link["class"] == "H5L_TYPE_HARD":
            collection_name = getCollectionForId(link["id"])
            link["collection"] = collection_name
            target_uri = '/' + collection_name + '/' + link["id"]
            link["target"] = getHref(request, target_uri)
        link_uri = '/groups/' + group_id + '/links/' + link['title']
        link["href"] = getHref(request, link_uri)

    resp_json = {}
    resp_json["links"] = links
    hrefs = []
    group_uri = '/groups/'+group_id
    hrefs.append({'rel': 'self', 'href': getHref(request, group_uri+'/links')})
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
コード例 #12
0
ファイル: link_sn.py プロジェクト: t20100/hsds
async def GET_Link(request):
    """HTTP method to return JSON for a group link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = f"Invalid group id: {group_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    log.debug("get LINK: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    link_json = await http_get(app, req, params=params)
    log.debug("got link_json: " + str(link_json))
    resp_link = {}
    resp_link["title"] = link_title
    link_class = link_json["class"]
    resp_link["class"] = link_class
    if link_class == "H5L_TYPE_HARD":
        resp_link["id"] = link_json["id"]
        resp_link["collection"] = getCollectionForId(link_json["id"])
    elif link_class == "H5L_TYPE_SOFT":
        resp_link["h5path"] = link_json["h5path"]
    elif link_class == "H5L_TYPE_EXTERNAL":
        resp_link["h5path"] = link_json["h5path"]
        resp_link["h5domain"] = link_json["h5domain"]
    else:
        log.warn(f"Unexpected link class: {link_class}")
    resp_json = {}
    resp_json["link"] = resp_link
    resp_json["created"] = link_json["created"]
    # links don't get modified, so use created timestamp as lastModified
    resp_json["lastModified"] = link_json["created"]

    hrefs = []
    group_uri = '/groups/'+group_id
    hrefs.append({'rel': 'self', 'href': getHref(request, group_uri+'/links/'+link_title)})
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    if link_json["class"] == "H5L_TYPE_HARD":
        target = '/' + resp_link["collection"] + '/' + resp_link["id"]
        hrefs.append({'rel': 'target', 'href': getHref(request, target)})

    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
コード例 #13
0
ファイル: chunk_dn.py プロジェクト: pcrespov/hsds
async def getChunk(app,
                   chunk_id,
                   dset_json,
                   bucket=None,
                   s3path=None,
                   s3offset=0,
                   s3size=0,
                   chunk_init=False):
    # if the chunk cache has too many dirty items, wait till items get flushed to S3
    MAX_WAIT_TIME = 10.0  # TBD - make this a config
    chunk_cache = app['chunk_cache']
    if chunk_init and s3offset > 0:
        log.error(
            f"unable to initiale chunk {chunk_id} for reference layouts ")
        raise HTTPInternalServerError()

    log.debug(
        f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}"
    )

    chunk_arr = None
    dset_id = getDatasetId(chunk_id)
    dims = getChunkLayout(dset_json)
    type_json = dset_json["type"]
    dt = createDataType(type_json)
    # note - officially we should follow the order in which the filters are defined in the filter_list,
    # but since we currently have just deflate and shuffle we will always apply deflate then shuffle on read,
    # and shuffle then deflate on write
    # also note - get deflate and shuffle will update the deflate and shuffle map so that the s3sync will do the right thing
    deflate_level = getDeflate(app, dset_id, dset_json)
    shuffle = getShuffle(app, dset_id, dset_json)
    s3key = None

    if s3path:
        if not s3path.startswith("s3://"):
            # TBD - verify these at dataset creation time?
            log.error(f"unexpected s3path for getChunk: {s3path}")
            raise HTTPInternalServerError()
        path = s3path[5:]
        index = path.find('/')  # split bucket and key
        if index < 1:
            log.error(f"s3path is invalid: {s3path}")
            raise HTTPInternalServerError()
        bucket = path[:index]
        s3key = path[(index + 1):]
        log.debug(f"Using s3path bucket: {bucket} and  s3key: {s3key}")
    else:
        s3key = getS3Key(chunk_id)
        log.debug(f"getChunk chunkid: chunk_id bucket: {bucket}")
    if chunk_id in chunk_cache:
        chunk_arr = chunk_cache[chunk_id]
    else:
        if s3path and s3size == 0:
            obj_exists = False
        else:
            obj_exists = await isStorObj(app, s3key, bucket=bucket)
        # TBD - potential race condition?
        if obj_exists:
            pending_s3_read = app["pending_s3_read"]

            if chunk_id in pending_s3_read:
                # already a read in progress, wait for it to complete
                read_start_time = pending_s3_read[chunk_id]
                log.info(
                    f"s3 read request for {chunk_id} was requested at: {read_start_time}"
                )
                while time.time() - read_start_time < 2.0:
                    log.debug("waiting for pending s3 read, sleeping")
                    await asyncio.sleep(1)  # sleep for sub-second?
                    if chunk_id in chunk_cache:
                        log.info(f"Chunk {chunk_id} has arrived!")
                        chunk_arr = chunk_cache[chunk_id]
                        break
                if chunk_arr is None:
                    log.warn(
                        f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read"
                    )

            if chunk_arr is None:
                if chunk_id not in pending_s3_read:
                    pending_s3_read[chunk_id] = time.time()
                log.debug(f"Reading chunk {s3key} from S3")

                chunk_bytes = await getStorBytes(app,
                                                 s3key,
                                                 shuffle=shuffle,
                                                 deflate_level=deflate_level,
                                                 s3offset=s3offset,
                                                 s3size=s3size,
                                                 bucket=bucket)
                if chunk_id in pending_s3_read:
                    # read complete - remove from pending map
                    elapsed_time = time.time() - pending_s3_read[chunk_id]
                    log.info(f"s3 read for {s3key} took {elapsed_time}")
                    del pending_s3_read[chunk_id]
                else:
                    log.warn(
                        f"expected to find {chunk_id} in pending_s3_read map")

                chunk_arr = np.fromstring(chunk_bytes, dtype=dt)
                chunk_arr = chunk_arr.reshape(dims)

            log.debug(f"chunk size: {chunk_arr.size}")

        elif chunk_init:
            log.debug(f"Initializing chunk {chunk_id}")
            fill_value = getFillValue(dset_json)
            if fill_value:
                # need to convert list to tuples for numpy broadcast
                if isinstance(fill_value, list):
                    fill_value = tuple(fill_value)
                chunk_arr = np.empty(dims, dtype=dt, order='C')
                chunk_arr[...] = fill_value
            else:
                chunk_arr = np.zeros(dims, dtype=dt, order='C')
        else:
            log.debug(f"Chunk {chunk_id} not found")

        if chunk_arr is not None:
            # check that there's room in the cache before adding it
            if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                # no room in the cache, wait till space is freed by the s3sync task
                wait_start = time.time()
                while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                    log.warn(
                        f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed"
                    )
                    if time.time() - wait_start > MAX_WAIT_TIME:
                        log.error(
                            f"unable to save updated chunk {chunk_id} to cache returning 503 error"
                        )
                        raise HTTPServiceUnavailable()
                    await asyncio.sleep(1)

            chunk_cache[chunk_id] = chunk_arr  # store in cache
    return chunk_arr
コード例 #14
0
ファイル: chunk_dn.py プロジェクト: pcrespov/hsds
async def POST_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info(f"POST Chunk put points, num_points: {num_points}")

        put_points = True
    else:
        log.info("POST Chunk get points")
    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        if put_points:
            log.error("s3path can not be used with put points POST request")
            raise HTTPBadRequest()
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - using s3path: {s3path}")
        bucket = None
    elif "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"POST chunk_id: {chunk_id}")
    chunk_index = getChunkIndex(chunk_id)
    log.debug(f"chunk_index: {chunk_index}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug(f"request params: {list(params.keys())}")
    if "dset" in params:
        msg = "Unexpected dset in POST request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)
    log.debug(f"dset_json: {dset_json}")
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug(f"chunk_coord: {chunk_coord}")

    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = f"Unexpected content_type: {content_type}"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug(f"dtype: {dset_dtype}")

    dims = getChunkLayout(dset_json)
    log.debug(f"got dims: {dims}")
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request)
    if len(input_bytes) != request.content_length:
        msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               bucket=bucket,
                               s3path=s3path,
                               s3offset=s3offset,
                               s3size=s3size,
                               chunk_init=put_points)

    if chunk_arr is None:
        if put_points:
            log.error("no array returned for put_points")
            raise HTTPInternalServerError()
        else:
            # get points on a non-existent S3 objects?
            log.warn("S3 object not found for get points")
            raise HTTPNotFound()

    log.debug(f"chunk_arr.shape: {chunk_arr.shape}")

    if put_points:
        # writing point data

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = f"({rank},)uint64"
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)),
                               ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)

        if len(point_arr) != num_points:
            msg = f"Unexpected size of point array, got: {len(point_arr)} expected: {num_points}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        for i in range(num_points):
            elem = point_arr[i]
            log.debug(f"non-relative coordinate: {elem}")
            if rank == 1:
                coord = int(elem[0])
                coord = coord % chunk_layout[0]  # adjust to chunk relative

            else:
                coord = elem[0]  # index to update
                for dim in range(rank):
                    # adjust to chunk relative
                    coord[dim] = int(coord[dim]) % chunk_layout[dim]
                coord = tuple(coord)  # need to convert to a tuple
            log.debug(f"relative coordinate: {coord}")

            val = elem[1]  # value
            try:
                chunk_arr[coord] = val  # update the point
            except IndexError:
                msg = "Out of bounds point index for POST Chunk"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)
        log.info(f"set {chunk_id} to dirty")

    else:
        # reading point data
        point_dt = np.dtype('uint64')  # use unsigned long for point index
        point_arr = np.fromstring(
            input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug(f"got {num_points} points")

        point_arr = point_arr.reshape((num_points, rank))
        output_arr = np.zeros((num_points, ), dtype=dset_dtype)

        for i in range(num_points):
            point = point_arr[i, :]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val

    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

    return resp
コード例 #15
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def PUT_Domain(request):
    """HTTP method to create a new domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query
    # verify username, password
    username, pswd = getUserPasswordFromRequest(
        request)  # throws exception if user/password is not valid
    await validateUserPassword(app, username, pswd)

    # inital perms for owner and default
    owner_perm = {
        'create': True,
        'read': True,
        'update': True,
        'delete': True,
        'readACL': True,
        'updateACL': True
    }
    default_perm = {
        'create': False,
        'read': True,
        'update': False,
        'delete': False,
        'readACL': False,
        'updateACL': False
    }

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    log.info("PUT domain: {}, username: {}".format(domain, username))

    body = None
    if request.has_body:
        body = await request.json()
        log.debug("PUT domain with body: {}".format(body))

    if ("flush" in params and params["flush"]) or (body and "flush" in body
                                                   and body["flush"]):
        # flush domain - update existing domain rather than create a new resource
        domain_json = await getDomainJson(app, domain, reload=True)
        log.debug("got domain_json: {}".format(domain_json))

        if domain_json is None:
            log.warn("domain: {} not found".format(domain))
            raise HTTPNotFound()

        if 'owner' not in domain_json:
            log.error("No owner key found in domain")
            raise HTTPInternalServerError()

        if 'acls' not in domain_json:
            log.error("No acls key found in domain")
            raise HTTPInternalServerError()

        aclCheck(domain_json, "update",
                 username)  # throws exception if not allowed
        if "root" in domain_json:
            # nothing to do for folder objects
            await doFlush(app, domain_json["root"])
        # flush  successful
        resp = await jsonResponse(request, None, status=204)
        log.response(request, resp=resp)
        return resp

    is_folder = False
    owner = username
    linked_domain = None
    root_id = None

    if body and "folder" in body:
        if body["folder"]:
            is_folder = True
    if body and "owner" in body:
        owner = body["owner"]
    if body and "linked_domain" in body:
        if is_folder:
            msg = "Folder domains can not be used for links"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        linked_domain = body["linked_domain"]
        log.info(f"linking to domain: {linked_domain}")

    if owner != username and username != "admin":
        log.warn("Only admin users are allowed to set owner for new domains")
        raise HTTPForbidden()

    parent_domain = getParentDomain(domain)
    log.debug("Parent domain: [{}]".format(parent_domain))

    if (not parent_domain or parent_domain == '/') and not is_folder:
        msg = "Only folder domains can be created at the top-level"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if (not parent_domain or parent_domain == '/') and username != "admin":
        msg = "creation of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    parent_json = None
    if parent_domain and parent_domain != '/':
        try:
            parent_json = await getDomainJson(app, parent_domain, reload=True)
        except ClientResponseError as ce:
            if ce.code == 404:
                msg = "Parent domain: {} not found".format(parent_domain)
                log.warn(msg)
                raise HTTPNotFound()
            elif ce.code == 410:
                msg = "Parent domain: {} removed".format(parent_domain)
                log.warn(msg)
                raise HTTPGone()
            else:
                log.error(f"Unexpected error: {ce.code}")
                raise HTTPInternalServerError()

        log.debug("parent_json {}: {}".format(parent_domain, parent_json))
        if "root" in parent_json and parent_json["root"]:
            msg = "Parent domain must be a folder"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if parent_json:
        aclCheck(parent_json, "create",
                 username)  # throws exception if not allowed

    if linked_domain:
        linked_json = await getDomainJson(app, linked_domain, reload=True)
        log.debug(f"got linked json: {linked_json}")
        if "root" not in linked_json:
            msg = "Folder domains cannot ber used as link target"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        root_id = linked_json["root"]
        aclCheck(linked_json, "read", username)
        aclCheck(linked_json, "delete", username)
    else:
        linked_json = None

    if not is_folder and not linked_json:
        # create a root group for the new domain
        root_id = createObjId("roots")
        log.debug("new root group id: {}".format(root_id))
        group_json = {"id": root_id, "root": root_id, "domain": domain}
        log.debug("create group for domain, body: " + json.dumps(group_json))

        # create root group
        req = getDataNodeUrl(app, root_id) + "/groups"
        try:
            group_json = await http_post(app, req, data=group_json)
        except ClientResponseError as ce:
            msg = "Error creating root group for domain -- " + str(ce)
            log.error(msg)
            raise HTTPInternalServerError()
    else:
        log.debug("no root group, creating folder")

    domain_json = {}

    domain_acls = {}
    # owner gets full control
    domain_acls[owner] = owner_perm
    if config.get("default_public") or is_folder:
        # this will make the domain public readable
        log.debug("adding default perm for domain: {}".format(domain))
        domain_acls["default"] = default_perm

    # construct dn request to create new domain
    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"owner": owner, "domain": domain}
    body["acls"] = domain_acls

    if root_id:
        body["root"] = root_id

    log.debug("creating domain: {} with body: {}".format(domain, body))
    try:
        domain_json = await http_put(app, req, data=body)
    except ClientResponseError as ce:
        msg = "Error creating domain state -- " + str(ce)
        log.error(msg)
        raise HTTPInternalServerError()

    # domain creation successful
    # maxin limits
    domain_json["limits"] = getLimits()
    domain_json["version"] = getVersion()
    resp = await jsonResponse(request, domain_json, status=201)
    log.response(request, resp=resp)
    return resp
コード例 #16
0
async def save_metadata_obj(app, obj_id, obj_json, notify=False, flush=False):
    """ Persist the given object """
    log.info(f"save_metadata_obj {obj_id} notify={notify} flush={flush}")
    if notify and not flush:
        log.error("notify not valid when flush is false")
        raise HTTPInternalServerError()

    if not obj_id.startswith('/') and not isValidUuid(obj_id):
        msg = "Invalid obj id: {}".format(obj_id)
        log.error(msg)
        raise HTTPInternalServerError()
    if not isinstance(obj_json, dict):
        log.error("Passed non-dict obj to save_metadata_obj")
        raise HTTPInternalServerError()

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error("Domain not in partition")
        raise HTTPInternalServerError()

    dirty_ids = app["dirty_ids"]
    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        if isValidUuid(obj_id):
            # domain objects may be re-created, but shouldn't see repeats of
            # deleted uuids
            log.warn("{} has been deleted".format(obj_id))
            raise HTTPInternalServerError()
        elif obj_id in deleted_ids:
            deleted_ids.remove(obj_id)  # un-gone the domain id

    # update meta cache
    meta_cache = app['meta_cache']
    log.debug("save: {} to cache".format(obj_id))
    meta_cache[obj_id] = obj_json

    meta_cache.setDirty(obj_id)
    now = int(time.time())

    if flush:
        # write to S3 immediately
        if isValidChunkId(obj_id):
            log.warn("flush not supported for save_metadata_obj with chunks")
            raise HTTPBadRequest()
        try:
            await write_s3_obj(app, obj_id)
        except KeyError as ke:
            log.error(f"s3 sync got key error: {ke}")
            raise HTTPInternalServerError()
        except HTTPInternalServerError:
            log.warn(f" failed to write {obj_id}")
            raise  # re-throw
        if obj_id in dirty_ids:
            log.warn(
                f"save_metadata_obj flush - object {obj_id} is still dirty")
    else:
        # flag to write to S3
        dirty_ids[obj_id] = now

    # message AN immediately if notify flag is set
    # otherwise AN will be notified at next S3 sync
    if notify:
        an_url = getAsyncNodeUrl(app)

        if obj_id.startswith("/"):
            # domain update
            req = an_url + "/domain"
            params = {"domain": obj_id}
            if "root" in obj_json:
                params["root"] = obj_json["root"]
            if "owner" in obj_json:
                params["owner"] = obj_json["owner"]
            try:
                log.info("ASync PUT notify: {} params: {}".format(req, params))
                await http_put(app, req, params=params)
            except HTTPInternalServerError as hpe:
                log.error(f"got error notifying async node: {hpe}")
                log.error(msg)

        else:
            req = an_url + "/object/" + obj_id
            try:
                log.info("ASync PUT notify: {}".format(req))
                await http_put(app, req)
            except HTTPInternalServerError:
                log.error(f"got error notifying async node")
コード例 #17
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def DELETE_Domain(request):
    """HTTP method to delete a domain resource"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    domain = None
    meta_only = False  # if True, just delete the meta cache value
    keep_root = False
    if request.has_body:
        body = await request.json()
        if "domain" in body:
            domain = body["domain"]
        else:
            msg = "No domain in request body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        if "meta_only" in body:
            meta_only = body["meta_only"]
        if "keep_root" in body:
            keep_root = body["keep_root"]

    else:
        # get domain from request uri
        try:
            domain = getDomainFromRequest(request)
        except ValueError:
            msg = "Invalid domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if "keep_root" in params:
            keep_root = params["keep_root"]

    log.info("meta_only domain delete: {}".format(meta_only))
    if meta_only:
        # remove from domain cache if present
        domain_cache = app["domain_cache"]
        if domain in domain_cache:
            log.info("deleting {} from domain_cache".format(domain))
            del domain_cache[domain]
        resp = await jsonResponse(request, {})
        return resp

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    parent_domain = getParentDomain(domain)
    if (not parent_domain or parent_domain == '/') and username != "admin":
        msg = "Deletion of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code == 404:
            log.warn("domain not found")
            raise HTTPNotFound()
        elif ce.code == 410:
            log.warn("domain has been removed")
            raise HTTPGone()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    aclCheck(domain_json, "delete",
             username)  # throws exception if not allowed

    # check for sub-objects if this is a folder
    if "root" not in domain_json:
        s3prefix = domain[1:] + '/'
        log.info(f"checking kets with prefix: {s3prefix} ")
        s3keys = await getS3Keys(app,
                                 include_stats=False,
                                 prefix=s3prefix,
                                 deliminator='/')
        for s3key in s3keys:
            if s3key.endswith("/"):
                log.warn(f"attempt to delete folder {domain} with sub-items")
                log.debug(f"got prefix: {s3keys[0]}")
                raise HTTPConflict(reason="folder has sub-items")

    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"domain": domain}

    rsp_json = await http_delete(app, req, data=body)

    if "root" in domain_json and not keep_root:
        # delete the root group
        root_id = domain_json["root"]
        req = getDataNodeUrl(app, root_id)
        req += "/groups/" + root_id
        await http_delete(app, req)

    # remove from domain cache if present
    domain_cache = app["domain_cache"]
    if domain in domain_cache:
        del domain_cache[domain]

    # delete domain cache from other sn_urls
    sn_urls = app["sn_urls"]
    body["meta_only"] = True
    for node_no in sn_urls:
        if node_no == app["node_number"]:
            continue  # don't send to ourselves
        sn_url = sn_urls[node_no]
        req = sn_url + "/"
        log.info("sending sn request: {}".format(req))
        try:
            sn_rsp = await http_delete(app, req, data=body)
            log.info("{} response: {}".format(req, sn_rsp))
        except ClientResponseError as ce:
            log.warn("got error for sn_delete: {}".format(ce))

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
コード例 #18
0
async def delete_metadata_obj(app, obj_id, notify=True, root_id=None):
    """ Delete the given object """
    meta_cache = app['meta_cache']
    dirty_ids = app["dirty_ids"]
    log.info("delete_meta_data_obj: {} notify: {}".format(obj_id, notify))
    if not isValidDomain(obj_id) and not isValidUuid(obj_id):
        msg = "Invalid obj id: {}".format(obj_id)
        log.error(msg)
        raise HTTPInternalServerError()

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error(f"obj: {obj_id} not in partition")
        raise HTTPInternalServerError()

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        log.warn("{} has already been deleted".format(obj_id))
    else:
        deleted_ids.add(obj_id)

    if obj_id in meta_cache:
        log.debug(f"removing {obj_id} from meta_cache")
        del meta_cache[obj_id]

    if obj_id in dirty_ids:
        del dirty_ids[obj_id]

    # remove from S3 (if present)
    s3key = getS3Key(obj_id)

    if await isS3Obj(app, s3key):
        await deleteS3Obj(app, s3key)
    else:
        log.info(
            f"delete_metadata_obj - key {s3key} not found (never written)?")

    if notify:
        an_url = getAsyncNodeUrl(app)
        if obj_id.startswith("/"):
            # domain delete
            req = an_url + "/domain"
            params = {"domain": obj_id}

            try:
                log.info("ASync DELETE notify: {} params: {}".format(
                    req, params))
                await http_delete(app, req, params=params)
            except ClientError as ce:
                log.error(f"got error notifying async node: {ce}")
            except HTTPInternalServerError as hse:
                log.error(f"got HTTPInternalServerError: {hse}")
        else:
            req = an_url + "/object/" + obj_id
            try:
                log.info(f"ASync DELETE notify: {req}")
                await http_delete(app, req)
            except ClientError as ce:
                log.error(f"got ClientError notifying async node: {ce}")
            except HTTPInternalServerError as ise:
                log.error(
                    f"got HTTPInternalServerError notifying async node: {ise}")
    log.debug(f"delete_metadata_obj for {obj_id} done")
コード例 #19
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def GET_ACLs(request):
    """HTTP method to return JSON for domain/ACLs"""
    log.request(request)
    app = request.app

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(message=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError:
        log.warn("domain not found")
        log.warn(msg)
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "readACL",
             username)  # throws exception if not authorized

    acl_list = []
    acl_usernames = list(acls.keys())
    acl_usernames.sort()
    for acl_username in acl_usernames:
        entry = {"userName": acl_username}
        acl = acls[acl_username]

        for k in acl.keys():
            entry[k] = acl[k]
        acl_list.append(entry)
    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acls"] = acl_list

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
コード例 #20
0
async def write_s3_obj(app, obj_id):
    """ writes the given object to s3 """
    s3key = getS3Key(obj_id)
    log.info(f"write_s3_obj for obj_id: {obj_id} / s3_key: {s3key}")
    pending_s3_write = app["pending_s3_write"]
    pending_s3_write_tasks = app["pending_s3_write_tasks"]
    dirty_ids = app["dirty_ids"]
    chunk_cache = app['chunk_cache']
    meta_cache = app['meta_cache']
    deflate_map = app['deflate_map']
    notify_objs = app["an_notify_objs"]
    deleted_ids = app['deleted_ids']
    success = False

    if s3key in pending_s3_write:
        msg = f"write_s3_key - not expected for key {s3key} to be in pending_s3_write map"
        log.error(msg)
        raise KeyError(msg)

    if obj_id not in pending_s3_write_tasks:
        # don't allow reentrant write
        log.debug(f"write_s3_obj for {obj_id} not s3sync task")

    if obj_id in deleted_ids and isValidUuid(obj_id):
        # if this objid has been deleted (and its unique since this is not a domain id)
        # cancel any pending task and return
        log.warn(f"Canceling wrfite for {obj_id} since it has been deleted")
        if obj_id in pending_s3_write_tasks:
            log.info(f"removing pending s3 write task for {obj_id}")
            task = pending_s3_write_tasks[obj_id]
            task.cancel()
            del pending_s3_write_tasks[obj_id]
        return None

    now = time.time()

    last_update_time = now
    if obj_id in dirty_ids:
        last_update_time = dirty_ids[obj_id]
    if last_update_time > now:
        msg = f"last_update time {last_update_time} is in the future for obj_id: {obj_id}"
        log.error(msg)
        raise ValueError(msg)

    pending_s3_write[s3key] = now
    # do the following in the try block so we can always remove the pending_s3_write at the end

    try:
        if isValidChunkId(obj_id):
            if obj_id not in chunk_cache:
                log.error("expected to find obj_id: {} in chunk cache".format(
                    obj_id))
                raise KeyError(f"{obj_id} not found in chunk cache")
            if not chunk_cache.isDirty(obj_id):
                log.error(f"expected chunk cache obj {obj_id} to be dirty")
                raise ValueError("bad dirty state for obj")
            chunk_arr = chunk_cache[obj_id]
            chunk_bytes = arrayToBytes(chunk_arr)
            dset_id = getDatasetId(obj_id)
            deflate_level = None
            if dset_id in deflate_map:
                deflate_level = deflate_map[dset_id]
                log.debug("got deflate_level: {} for dset: {}".format(
                    deflate_level, dset_id))

            await putS3Bytes(app,
                             s3key,
                             chunk_bytes,
                             deflate_level=deflate_level)
            success = True

            # if chunk has been evicted from cache something has gone wrong
            if obj_id not in chunk_cache:
                msg = f"expected to find {obj_id} in chunk_cache"
                log.error(msg)
            elif obj_id in dirty_ids and dirty_ids[obj_id] > last_update_time:
                log.info(
                    f"write_s3_obj {obj_id} got updated while s3 write was in progress"
                )
            else:
                # no new write, can clear dirty
                chunk_cache.clearDirty(obj_id)  # allow eviction from cache
                log.debug(
                    "putS3Bytes Chunk cache utilization: {} per, dirty_count: {}"
                    .format(chunk_cache.cacheUtilizationPercent,
                            chunk_cache.dirtyCount))
        else:
            # meta data update
            # check for object in meta cache
            if obj_id not in meta_cache:
                log.error(
                    "expected to find obj_id: {} in meta cache".format(obj_id))
                raise KeyError(f"{obj_id} not found in chunk cache")
            if not meta_cache.isDirty(obj_id):
                log.error(f"expected meta cache obj {obj_id} to be dirty")
                raise ValueError("bad dirty state for obj")
            obj_json = meta_cache[obj_id]

            await putS3JSONObj(app, s3key, obj_json)
            success = True
            # should still be in meta_cache...
            if obj_id not in meta_cache:
                msg = f"expected to find {obj_id} in meta_cache"
                log.error(msg)
            elif obj_id in dirty_ids and dirty_ids[obj_id] > last_update_time:
                log.info(
                    f"write_s3_obj {obj_id} got updated while s3 write was in progress"
                )
            else:
                meta_cache.clearDirty(obj_id)  # allow eviction from cache
    finally:
        # clear pending_s3_write item
        log.debug(f"write_s3_obj finally block, success={success}")
        if s3key not in pending_s3_write:
            msg = f"write s3 obj: Expected to find {s3key} in pending_s3_write map"
            log.error(msg)
        else:
            if pending_s3_write[s3key] != now:
                msg = f"pending_s3_write timestamp got updated unexpectedly for {s3key}"
                log.error(msg)
            del pending_s3_write[s3key]
        # clear task
        if obj_id not in pending_s3_write_tasks:
            log.debug(f"no pending s3 write task for {obj_id}")
        else:
            log.debug(f"removing pending s3 write task for {obj_id}")
            del pending_s3_write_tasks[obj_id]
        # clear dirty flag
        if obj_id in dirty_ids and dirty_ids[obj_id] == last_update_time:
            log.debug(f"clearing dirty flag for {obj_id}")
            del dirty_ids[obj_id]

    # add to set so that AN can be notified about changed objects
    notify_objs.add(obj_id)

    # calculate time to do the write
    elapsed_time = time.time() - now
    log.info(f"s3 write for {s3key} took {elapsed_time:.3f}s")
    return obj_id
コード例 #21
0
ファイル: link_dn.py プロジェクト: t20100/hsds
async def PUT_Link(request):
    """ Handler creating a new link"""
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    log.info(f"PUT link: {group_id}")
    if not isValidUuid(group_id, obj_class="group"):
        log.error(f"Unexpected group_id: {group_id}")
        raise HTTPInternalServerError()

    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    log.info(f"link_title: {link_title}")

    if not request.has_body:
        msg = "PUT Link with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()

    if "class" not in body:
        msg = "PUT Link with no class key body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_class = body["class"]

    link_json = {}
    link_json["class"] = link_class

    if "id" in body:
        link_json["id"] = body["id"]
    if "h5path" in body:
        link_json["h5path"] = body["h5path"]
    if "h5domain" in body:
        link_json["h5domain"] = body["h5domain"]

    if "bucket" in params:
        bucket = params["bucket"]
    elif "bucket" in body:
        bucket = params["bucket"]
    else:
        bucket = None

    group_json = await get_metadata_obj(app, group_id, bucket=bucket)
    if "links" not in group_json:
        log.error(f"unexpected group data for id: {group_id}")
        raise HTTPInternalServerError()

    links = group_json["links"]
    if link_title in links:
        msg = f"Link name {link_title} already found in group: {group_id}"
        log.warn(msg)
        raise HTTPConflict()

    now = time.time()
    link_json["created"] = now

    # add the link
    links[link_title] = link_json

    # update the group lastModified
    group_json["lastModified"] = now

    # write back to S3, save to metadata cache
    await save_metadata_obj(app, group_id, group_json, bucket=bucket)

    resp_json = { }

    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
コード例 #22
0
async def s3sync(app):
    """ Periodic method that writes dirty objects in the metadata cache to S3"""
    MAX_PENDING_WRITE_REQUESTS = 20
    dirty_ids = app["dirty_ids"]
    pending_s3_write = app["pending_s3_write"]
    pending_s3_write_tasks = app["pending_s3_write_tasks"]
    s3_sync_interval = config.get("s3_sync_interval")
    dirty_count = len(dirty_ids)
    if not dirty_count:
        log.info("s3sync nothing to update")
        return 0

    log.info(
        f"s3sync update - dirtyid count: {dirty_count}, active write tasks: {len(pending_s3_write_tasks)}/{MAX_PENDING_WRITE_REQUESTS}"
    )
    log.debug(f"s3sync dirty_ids: {dirty_ids}")
    log.debug(f"sesync pending write s3keys: {list(pending_s3_write.keys())}")
    log.debug(f"s3sync write tasks: {list(pending_s3_write_tasks.keys())}")

    def callback(future):
        try:
            obj_id = future.result()  # returns a objid
            log.info(f"write_s3_obj callback result: {obj_id}")
        except HTTPInternalServerError as hse:
            log.error(f"write_s3_obj callback got 500: {hse}")
        except Exception as e:
            log.error(f"write_s3_obj callback unexpected exception: {e}")

    update_count = 0
    s3sync_start = time.time()

    for obj_id in dirty_ids:
        s3key = getS3Key(obj_id)
        log.debug(f"s3sync dirty id: {obj_id}, s3key: {s3key}")
        create_task = True
        if s3key in pending_s3_write:
            log.debug(
                f"key {s3key} has been pending for {s3sync_start - pending_s3_write[s3key]}"
            )
            if s3sync_start - pending_s3_write[s3key] > s3_sync_interval * 2:
                log.warn(
                    f"obj {obj_id} has been in pending_s3_write for {s3sync_start - pending_s3_write[s3key]} seconds, restarting"
                )
                del pending_s3_write[s3key]
                if obj_id not in pending_s3_write_tasks:
                    log.error(f"Expected to find write task for {obj_id}")
                else:
                    task = pending_s3_write_tasks[obj_id]
                    task.cancel()
                    del pending_s3_write_tasks[obj_id]
            else:
                log.debug(f"key {s3key} has a pending write task")
                create_task = False
                if obj_id not in pending_s3_write_tasks:
                    log.error(
                        f"expected to find {obj_id} in pending_s3_write_tasks")
        if create_task and len(
                pending_s3_write_tasks) < MAX_PENDING_WRITE_REQUESTS:
            # create a task to write this object
            log.debug(f"s3sync - ensure future for {obj_id}")
            task = asyncio.ensure_future(write_s3_obj(app, obj_id))
            task.add_done_callback(callback)
            pending_s3_write_tasks[obj_id] = task
            update_count += 1

    # notify AN of key updates
    an_url = getAsyncNodeUrl(app)

    notify_objs = app["an_notify_objs"]
    if len(notify_objs) > 0:
        log.info(f"Notifying AN for {len(notify_objs)} S3 Updates")
        body = {"objs": list(notify_objs)}
        notify_objs.clear()

        req = an_url + "/objects"
        try:
            log.info("ASync PUT notify: {} body: {}".format(req, body))
            await http_put(app, req, data=body)
        except HTTPInternalServerError as hpe:
            msg = "got error notifying async node: {}".format(hpe)
            log.error(msg)

    # return number of objects written
    return update_count
コード例 #23
0
async def oio_register(app):
    """ register with oio conscience
    """
    log.info("oio_register")

    oio_proxy = app["oio_proxy"]
    host_ip = app["host_ip"]
    if not host_ip:
        log.error("host ip not set")
        return
    node_type = app["node_type"]
    if node_type not in ("sn", "dn"):
        log.error("unexpected node type")
        return
    service_name = "hdf" + node_type
    req = oio_proxy + "/v3.0/OPENIO/conscience/register"

    body = {
        "addr": host_ip + ":" + str(app["node_port"]),
        "tags": {
            "stat.cpu": 100,
            "tag.up": True
        },
        "type": service_name
    }
    log.debug(f"conscience register: body: {body}")
    try:
        await http_post(app, req, data=body)
    except ClientError as client_exception:
        log.error(
            f"got ClientError registering with oio_proxy: {client_exception}")
        return
    except CancelledError as cancelled_exception:
        log.error(
            f"got CanceeledError registering with oio_proxy: {cancelled_exception}"
        )
        return
    log.info("oio registration successful")

    # get list of DN containers
    req = oio_proxy + "/v3.0/OPENIO/conscience/list?type=hdfdn"
    try:
        dn_node_list = await http_get(app, req)
    except ClientError as client_exception:
        log.error(
            f"got ClientError listing dn nodes with oio_proxy: {client_exception}"
        )
        return
    except CancelledError as cancelled_exception:
        log.error(
            f"got CanceeledError listing dn nodes with oio_proxy: {cancelled_exception}"
        )
        return
    log.info(f"got {len(dn_node_list)} conscience list items")
    # create map keyed by dn addr
    dn_node_map = {}
    for dn_node in dn_node_list:
        log.debug(f"checking dn conscience list item: {dn_node}")
        if "addr" not in dn_node:
            log.warn(f"conscience list item with no addr: {dn_node}")
            continue
        addr = dn_node["addr"]
        if "score" not in dn_node:
            log.warn(f'conscience list item with no score key: {dn_node}')
            continue
        if dn_node["score"] <= 0:
            log.debug(f"zero score - skipping conscience list addr: {addr}")
            continue
        if addr in dn_node_map:
            # shouldn't ever get this?
            log.warn(f"duplicate entry for node: {dn_node}")
            continue
        # send an info request to the node
        info_rsp = await get_info(app, "http://" + addr)
        if not info_rsp:
            # timeout or other failure
            continue
        if "node" not in info_rsp:
            log.error("expecteed to find node key in info resp")
            continue
        info_node = info_rsp["node"]
        log.debug(f"got info resp: {info_node}")
        for key in ("type", "id", "node_number", "node_count"):
            if key not in info_node:
                log.error(
                    f"unexpected node type in node state, expected to find key: {key}"
                )
                continue
        if info_node["type"] != "dn":
            log.error(f"expecteed node_type to be dn")
            continue
        # mix in node id, node number, node_count to the conscience info
        dn_node["node_id"] = info_node["id"]
        dn_node["node_number"] = info_node["node_number"]
        dn_node["node_count"] = info_node["node_count"]

        dn_node_map[addr] = dn_node

    log.info(f"done with dn_node_list, got: {len(dn_node_map)} active nodes")
    if len(dn_node_map) == 0:
        if app["node_state"] != "INITIALIZING":
            log.info(
                "no active DN nodes, setting cluster state to INITIALIZING")
            app["node_state"] = "INITIALIZING"
        return

    # sort map by address
    addrs = list(dn_node_map.keys())
    addrs.sort()

    # check that node number is set and is the expected value for each node key
    invalid_count = 0
    node_index = 0
    node_count = len(addrs)
    dn_urls = {}
    this_node_found = False
    this_node_id = app["id"]
    for addr in addrs:
        dn_node = dn_node_map[addr]
        log.debug(f"dn_node for index {node_index}: {dn_node}")
        node_id = dn_node["node_id"]
        if node_id == this_node_id:
            this_node_found = True
        node_number = dn_node["node_number"]
        dn_urls[node_number] = "http://" + dn_node["addr"]
        if node_index != node_number or dn_node["node_count"] != node_count:
            if node_number == -1:
                log.info(f"node {node_index} not yet initialized")
            elif node_index != node_number:
                log.warn(
                    f"node_id {node_id}, expected node_number of {node_index} but found {node_number}"
                )
            invalid_count += 1
            if node_id == app["id"]:
                # this is us, update our node_number, node_count
                if app["node_number"] != node_index:
                    # TBD - clean cache items
                    log.info(
                        f"setting node_number for this node to: {node_index}")
                    app["node_number"] = node_index
                if app["node_count"] != node_count:
                    # TBD - clean cache items
                    log.info(
                        f"setting node_count for this node to: {node_count}")
                    app["node_count"] = node_count
            invalid_count += 1
        else:
            log.debug(f"node {node_id} node number is correct")
        node_index += 1

    if invalid_count == 0:
        log.debug("no invalid nodes!")
        if app["node_state"] != "READY":
            if app["node_type"] == "dn" and not this_node_found:
                # don't go to READY unless this node shows up
                log.info(
                    f"node {this_node_id} not yet showing in proxy list, stay in INITIALIZING"
                )
            else:
                log.info("setting node state to READY")
                app["node_state"] = "READY"
                if app["node_type"] == "sn" and app["node_number"] == -1:
                    # node number shouldn't matter for SN nodes, so set to 1
                    app["node_number"] = 1
        if app["node_count"] != node_count:
            log.info(f"setting node_count to: {node_count}")
            app["node_count"] = node_count
        app["dn_urls"] = dn_urls
    else:
        log.debug(f"number invalid nodes: {invalid_count}")
        if app["node_state"] == "READY":
            log.warn("invalid nodes found, setting node state to INITIALIZING")
            app["node_state"] = "INITIALIZING"

    log.info("oio_register done")
コード例 #24
0
async def get_metadata_obj(app, obj_id):
    """ Get object from metadata cache (if present).
        Otherwise fetch from S3 and add to cache
    """
    log.info("get_metadata_obj: {}".format(obj_id))
    if not isValidDomain(obj_id) and not isValidUuid(obj_id):
        msg = "Invalid obj id: {}".format(obj_id)
        log.error(msg)
        raise HTTPInternalServerError()

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error("Domain not in partition")
        raise HTTPInternalServerError()

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        msg = "{} has been deleted".format(obj_id)
        log.warn(msg)
        raise HTTPGone()

    meta_cache = app['meta_cache']
    obj_json = None
    if obj_id in meta_cache:
        log.debug("{} found in meta cache".format(obj_id))
        obj_json = meta_cache[obj_id]
    else:
        s3_key = getS3Key(obj_id)
        pending_s3_read = app["pending_s3_read"]
        if obj_id in pending_s3_read:
            # already a read in progress, wait for it to complete
            read_start_time = pending_s3_read[obj_id]
            log.info(
                f"s3 read request for {s3_key} was requested at: {read_start_time}"
            )
            while time.time() - read_start_time < 2.0:
                log.debug("waiting for pending s3 read, sleeping")
                await asyncio.sleep(1)  # sleep for sub-second?
                if obj_id in meta_cache:
                    log.info(f"object {obj_id} has arrived!")
                    obj_json = meta_cache[obj_id]
                    break
            if not obj_json:
                log.warn(
                    f"s3 read for object {s3_key} timed-out, initiaiting a new read"
                )

        # invoke S3 read unless the object has just come in from pending read
        if not obj_json:
            log.debug("getS3JSONObj({})".format(s3_key))
            if obj_id not in pending_s3_read:
                pending_s3_read[obj_id] = time.time()
            # read S3 object as JSON
            obj_json = await getS3JSONObj(app, s3_key)
            if obj_id in pending_s3_read:
                # read complete - remove from pending map
                elapsed_time = time.time() - pending_s3_read[obj_id]
                log.info(f"s3 read for {s3_key} took {elapsed_time}")
                del pending_s3_read[obj_id]
            meta_cache[obj_id] = obj_json  # add to cache
    return obj_json
コード例 #25
0
async def POST_Group(request):
    """HTTP method to create new Group object"""
    log.request(request)
    app = request.app

    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    domain_json = await getDomainJson(app, domain, reload=True)

    aclCheck(domain_json, "create",
             username)  # throws exception if not allowed

    if "root" not in domain_json:
        msg = f"Expected root key for domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    link_id = None
    link_title = None
    if request.has_body:
        body = await request.json()
        log.info(f"POST Group body: {body}")
        if body:
            if "link" in body:
                link_body = body["link"]
                log.debug(f"link_body: {link_body}")
                if "id" in link_body:
                    link_id = link_body["id"]
                if "name" in link_body:
                    link_title = link_body["name"]
                if link_id and link_title:
                    log.debug(f"link id: {link_id}")
                    # verify that the referenced id exists and is in this domain
                    # and that the requestor has permissions to create a link
                    await validateAction(app, domain, link_id, username,
                                         "create")
            if not link_id or not link_title:
                log.warn(f"POST Group body with no link: {body}")

    domain_json = await getDomainJson(
        app, domain)  # get again in case cache was invalidated

    root_id = domain_json["root"]
    group_id = createObjId("groups", rootid=root_id)
    log.info(f"new  group id: {group_id}")
    group_json = {"id": group_id, "root": root_id}
    log.debug("create group, body: " + json.dumps(group_json))
    req = getDataNodeUrl(app, group_id) + "/groups"
    params = {}
    if bucket:
        params["bucket"] = bucket

    group_json = await http_post(app, req, data=group_json, params=params)

    # create link if requested
    if link_id and link_title:
        link_json = {}
        link_json["id"] = group_id
        link_json["class"] = "H5L_TYPE_HARD"
        link_req = getDataNodeUrl(app, link_id)
        link_req += "/groups/" + link_id + "/links/" + link_title
        log.debug("PUT link - : " + link_req)
        put_json_rsp = await http_put(app,
                                      link_req,
                                      data=link_json,
                                      params=params)
        log.debug(f"PUT Link resp: {put_json_rsp}")
    log.debug("returning resp")
    # group creation successful
    resp = await jsonResponse(request, group_json, status=201)
    log.response(request, resp=resp)
    return resp
コード例 #26
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def get_domains(request):
    """ This method is called by GET_Domains and GET_Domain """
    app = request.app

    # if there is no domain passed in, get a list of top level domains
    if "domain" not in request.rel_url.query:
        prefix = '/'
    else:
        prefix = request.rel_url.query["domain"]
    log.info(f"get_domains for: {prefix}")

    if not prefix.startswith('/'):
        msg = "Prefix must start with '/'"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # always use "verbose" to pull extra info
    if "verbose" in request.rel_url.query and request.rel_url.query["verbose"]:
        verbose = True
    else:
        verbose = False

    limit = None
    if "Limit" in request.rel_url.query:
        try:
            limit = int(request.rel_url.query["Limit"])
            log.debug(f"GET_Domains - using Limit: {limit}")
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    marker = None
    if "Marker" in request.rel_url.query:
        marker = request.rel_url.query["Marker"]
        log.debug(f"got Marker request param: {marker}")

    # list the S3 keys for this prefix
    domainNames = []
    if prefix == "/":
        # search for domains from the top_level domains config
        domainNames = config.get("top_level_domains")
    else:
        s3prefix = prefix[1:]
        log.debug(f"listing S3 keys for {s3prefix}")
        s3keys = await getS3Keys(app,
                                 include_stats=False,
                                 prefix=s3prefix,
                                 deliminator='/')
        log.debug(f"getS3Keys returned: {len(s3keys)} keys")
        log.debug(f"s3keys {s3keys}")

        for s3key in s3keys:
            if s3key[-1] != '/':
                log.debug(f"ignoring key: {s3key}")
                continue
            log.debug(f"got s3key: {s3key}")
            domain = "/" + s3key[:-1]
            if marker:
                if marker == domain:
                    marker = None
                    continue

            log.debug(f"adding domain: {domain} to domain list")
            domainNames.append(domain)

            if limit and len(domainNames) == limit:
                # got to requested limit
                break

    # get domain info for each domain
    domains = []
    for domain in domainNames:
        try:
            # query DN's for domain json
            # TBD - multicast to DN nodes
            log.debug(f"getDomainJson for {domain}")
            domain_json = await getDomainJson(app, domain, reload=True)
            if domain_json:
                domain_rsp = await get_domain_response(app,
                                                       domain_json,
                                                       verbose=verbose)
                # mixin domain anme
                domain_rsp["name"] = domain
                domains.append(domain_rsp)
        except HTTPNotFound:
            # One of the dmains not found, but continue through the list
            log.debug(f"not found error for: {domain}")

    return domains
コード例 #27
0
ファイル: import_ghcn_file.py プロジェクト: whigg/hsds
async def import_line(line):
    domain = globals["domain"]
    params = {"host": domain}
    headers = getRequestHeaders()
    client = globals["client"]
    globals["lines_read"] += 1
    task_log = {}
    task_log["line"] = line
    task_log["start"] = time.time()
    
    fields = line.split(',')
    if len(fields) != 8:
        log.warn("unexpected number of fields in line: [()]".format(line))
        return
    station = fields[0]
    if len(station) != 11:
        log.warn("unexpected station length line: [()]".format(line))
        return
    date = fields[1]
    if len(date) != 8:
        log.warn("unexpected station length line: [()]".format(line))
        return
    obstype = fields[2]
    if len(obstype) != 4:
        log.warn("unexpected obstype length line: [()]".format(line))
        return
    value = 0
    try:
        value = int(fields[3])
    except ValueError:
        log.warn("unexpected value in line: [()]".format(line))
        return
    # TBD - do something with other fields
    log.info("data: {} {} {} {}".format(station, obstype, date, value))
    h5path = "/data/" + station + "/" + obstype
    task_log["h5path"] = h5path
    task_log["state"] = "INPROGRESS"
    globals["tasks"].append(task_log)  # add before the first await
    try:
        grp_id = await verifyGroupPath(h5path)
    except HttpProcessingError as hpe:
        log.error("failed to verifyGroupPath: {}, err: {}".format(h5path, str(hpe)))
        globals["failed_line_updates"] += 1
        task_log["state"] = "COMPLETE"
        return

    # create the attribute
    data = {'type': 'H5T_STD_I32LE', 'value': value}
    req = getEndpoint() + "/groups/" + grp_id + "/attributes/" + date
    log.info("PUT " + req)
    globals["request_count"] += 1
    task_log["req"] = req
    timeout = config.get("timeout")
    async with client.put(req, headers=headers, data=json.dumps(data), params=params, timeout=timeout) as rsp:
        task_log["stop"] = time.time()
        task_log["state"] = "COMPLETE"
        task_log["status"] = rsp.status
        if rsp.status == 409:
            log.warn("409 for req: " + req)
        elif rsp.status != 201:
            log.error("got status: {} for req: {}".format(rsp.status, req))
            globals["failed_line_updates"] += 1
        else:
            globals["attribute_count"] += 1
コード例 #28
0
ファイル: domain_sn.py プロジェクト: paulmueller/hsds
async def GET_Domain(request):
    """HTTP method to return JSON for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = None
    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        log.warn("Invalid domain")
        raise HTTPBadRequest(reason="Invalid domain name")

    verbose = False
    if "verbose" in params and params["verbose"]:
        verbose = True

    if not domain:
        log.info("no domain passed in, returning all top-level domains")
        # no domain passed in, return top-level domains for this request
        domains = await get_domains(request)
        rsp_json = {"domains": domains}
        rsp_json["hrefs"] = []
        resp = await jsonResponse(request, rsp_json)
        log.response(request, resp=resp)
        return resp

    log.info("got domain: {}".format(domain))

    domain_json = await getDomainJson(app, domain, reload=True)

    if domain_json is None:
        log.warn("domain: {} not found".format(domain))
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    if "h5path" in params:
        # if h5path is passed in, return object info for that path
        #   (if exists)
        h5path = params["h5path"]
        root_id = domain_json["root"]
        obj_id = await getObjectIdByPath(app, root_id,
                                         h5path)  # throws 404 if not found
        log.info("get obj_id: {} from h5path: {}".format(obj_id, h5path))
        # get authoritative state for object from DN (even if it's in the meta_cache).
        obj_json = await getObjectJson(app, obj_id, refresh=True)
        obj_json["domain"] = domain
        # Not bothering with hrefs for h5path lookups...
        resp = await jsonResponse(request, obj_json)
        log.response(request, resp=resp)
        return resp

    # return just the keys as per the REST API
    rsp_json = await get_domain_response(app, domain_json, verbose=verbose)

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'database',
            'href': getHref(request, '/datasets')
        })
        hrefs.append({'rel': 'groupbase', 'href': getHref(request, '/groups')})
        hrefs.append({
            'rel': 'typebase',
            'href': getHref(request, '/datatypes')
        })
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })

    hrefs.append({'rel': 'acls', 'href': getHref(request, '/acls')})
    parent_domain = getParentDomain(domain)
    log.debug("href parent domain: {}".format(parent_domain))
    if parent_domain:
        hrefs.append({
            'rel': 'parent',
            'href': getHref(request, '/', domain=parent_domain)
        })

    rsp_json["hrefs"] = hrefs
    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
コード例 #29
0
async def GET_Groups(request):
    """HTTP method to return groups collection for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code == 404:
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"Unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    # get the groups collection list
    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    obj_ids = []
    if "root" in domain_json or domain_json["root"]:
        # get the groups collection list
        collections = await get_collections(app, domain_json["root"])
        objs = collections["groups"]
        obj_ids = getIdList(objs, marker=marker, limit=limit)

    # create hrefs
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/groups')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})

    # return obj ids and hrefs
    rsp_json = {}
    rsp_json["groups"] = obj_ids
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
コード例 #30
0
ファイル: chunk_dn.py プロジェクト: pcrespov/hsds
async def PUT_Chunk(request):
    log.request(request)
    app = request.app
    params = request.rel_url.query
    query = None
    if "query" in params:
        query = params["query"]
        log.info(f"PUT_Chunk query: {query}")
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
        log.debug(f"PUT_Chunk using bucket: {bucket}")
    else:
        bucket = None

    if query:
        expected_content_type = "text/plain; charset=utf-8"
    else:
        expected_content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
        if content_type != expected_content_type:
            msg = f"Unexpected content_type: {content_type}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    if "dset" in params:
        msg = "Unexpected param dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    log.debug(f"PUT_Chunk - id: {chunk_id}")

    dset_id = getDatasetId(chunk_id)

    dset_json = await get_metadata_obj(app, dset_id, bucket=bucket)

    log.debug(f"dset_json: {dset_json}")

    dims = getChunkLayout(dset_json)

    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)

    rank = len(dims)

    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)
    selection = tuple(selection)
    log.debug(f"got selection: {selection}")

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug(f"dtype: {dt}")

    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug(f"selection[{i}]: {s}")

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent

    resp = {}
    query_update = None
    limit = 0
    chunk_init = True
    input_arr = None
    if query:
        if not dt.fields:
            log.error("expected compound dtype for PUT query")
            raise HTTPInternalServerError()
        query_update = await request.json()
        log.debug(f"query_update: {query_update}")
        if "Limit" in params:
            limit = int(params["Limit"])
        chunk_init = False
    else:
        # regular chunk update

        # check that the content_length is what we expect
        if itemsize != 'H5T_VARIABLE':
            log.debug(f"expect content_length: {num_elements*itemsize}")
        log.debug(f"actual content_length: {request.content_length}")

        if itemsize != 'H5T_VARIABLE' and (num_elements *
                                           itemsize) != request.content_length:
            msg = f"Expected content_length of: {num_elements*itemsize}, but got: {request.content_length}"
            log.error(msg)
            raise HTTPBadRequest(reason=msg)

        # create a numpy array for incoming data
        input_bytes = await request_read(
            request
        )  # TBD - will it cause problems when failures are raised before reading data?
        if len(input_bytes) != request.content_length:
            msg = f"Read {len(input_bytes)} bytes, expecting: {request.content_length}"
            log.error(msg)
            raise HTTPInternalServerError()

        input_arr = bytesToArray(input_bytes, dt, mshape)

    # TBD: Skip read if the input shape is the entire chunk?
    chunk_arr = await getChunk(app,
                               chunk_id,
                               dset_json,
                               chunk_init=chunk_init,
                               bucket=bucket)
    is_dirty = False
    if query:
        values = []
        indices = []
        if chunk_arr is not None:
            # do query selection
            limit = 0
            if "Limit" in params:
                limit = int(params["Limit"])

            field_names = list(dt.fields.keys())
            replace_mask = [
                None,
            ] * len(field_names)
            for i in range(len(field_names)):
                field_name = field_names[i]
                if field_name in query_update:
                    replace_mask[i] = query_update[field_name]
            log.debug(f"replace_mask: {replace_mask}")

            x = chunk_arr[selection]
            log.debug(f"put_query - x: {x}")
            eval_str = getEvalStr(query, "x", field_names)
            log.debug(f"put_query - eval_str: {eval_str}")
            where_result = np.where(eval(eval_str))
            log.debug(f"put_query - where_result: {where_result}")
            where_result_index = where_result[0]
            log.debug(f"put_query - whare_result index: {where_result_index}")
            log.debug(
                f"put_query - boolean selection: {x[where_result_index]}")
            s = selection[0]
            count = 0
            for index in where_result_index:
                log.debug(f"put_query - index: {index}")
                value = x[index]
                log.debug(f"put_query - original value: {value}")
                for i in range(len(field_names)):
                    if replace_mask[i] is not None:
                        value[i] = replace_mask[i]
                log.debug(f"put_query - modified value: {value}")
                x[index] = value

                json_val = bytesArrayToList(value)
                log.debug(f"put_query - json_value: {json_val}")
                json_index = index.tolist(
                ) * s.step + s.start  # adjust for selection
                indices.append(json_index)
                values.append(json_val)
                count += 1
                is_dirty = True
                if limit > 0 and count >= limit:
                    log.info("put_query - got limit items")
                    break

        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # update chunk array
        chunk_arr[selection] = input_arr
        is_dirty = True
        resp = json_response({}, status=201)

    if is_dirty:
        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)
        log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

        # async write to S3
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = (now, bucket)

    # chunk update successful
    log.response(request, resp=resp)
    return resp