예제 #1
0
파일: headnode.py 프로젝트: PjEdwards/hsds
    app.router.add_get('/nodestate/{nodetype}/{nodenumber}', nodestate)
    app.router.add_get('/nodeinfo', nodeinfo)
    app.router.add_get('/nodeinfo/{statkey}', nodeinfo)
    app.router.add_get('/info', info)
    app.router.add_post('/register', register)
    
    return app

#
# Main
#

if __name__ == '__main__':
    loop = asyncio.get_event_loop()
    app = loop.run_until_complete(init(loop))   

    # create a client Session here so that all client requests 
    #   will share the same connection pool
    max_tcp_connections = int(config.get("max_tcp_connections"))
    app['client'] = ClientSession(loop=loop, connector=TCPConnector(limit=max_tcp_connections))
     
    session = aiobotocore.get_session(loop=loop)
    app["session"] = session
    app["loop"] = loop
      
    asyncio.ensure_future(healthCheck(app), loop=loop)
    head_port = config.get("head_port")
    log.info("Starting service on port: {}".format(head_port))
    log.debug("debug test")
    run_app(app, port=int(head_port))
예제 #2
0
파일: link_sn.py 프로젝트: paulmueller/hsds
async def GET_Link(request):
    """HTTP method to return JSON for a group link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    log.debug("get LINK: " + req)
    link_json = await http_get(app, req)
    log.debug("got link_json: " + str(link_json))
    resp_link = {}
    resp_link["title"] = link_title
    resp_link["class"] = link_json["class"]
    if link_json["class"] == "H5L_TYPE_HARD":
        resp_link["id"] = link_json["id"]
        resp_link["collection"] = getCollectionForId(link_json["id"])
    elif link_json["class"] == "H5L_TYPE_SOFT":
        resp_link["h5path"] = link_json["h5path"]
    elif link_json["class"] == "H5L_TYPE_EXTERNAL":
        resp_link["h5path"] = link_json["h5path"]
        resp_link["h5domain"] = link_json["h5domain"]
    else:
        log.warn("Unexpected link class: {}".format(link_json["class"]))
    resp_json = {}
    resp_json["link"] = resp_link
    resp_json["created"] = link_json["created"]
    # links don't get modified, so use created timestamp as lastModified
    resp_json["lastModified"] = link_json["created"]

    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({
        'rel': 'self',
        'href': getHref(request, group_uri + '/links/' + link_title)
    })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    if link_json["class"] == "H5L_TYPE_HARD":
        target = '/' + resp_link["collection"] + '/' + resp_link["id"]
        hrefs.append({'rel': 'target', 'href': getHref(request, target)})

    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
예제 #3
0
파일: link_sn.py 프로젝트: paulmueller/hsds
async def GET_Links(request):
    """HTTP method to return JSON for link collection"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    await validateAction(app, domain, group_id, username, "read")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links"
    query_sep = '?'
    if limit is not None:
        req += query_sep + "Limit=" + str(limit)
        query_sep = '&'
    if marker is not None:
        req += query_sep + "Marker=" + marker

    log.debug("get LINKS: " + req)
    links_json = await http_get(app, req)
    log.debug("got links json from dn for group_id: {}".format(group_id))
    links = links_json["links"]

    # mix in collection key, target and hrefs
    for link in links:
        if link["class"] == "H5L_TYPE_HARD":
            collection_name = getCollectionForId(link["id"])
            link["collection"] = collection_name
            target_uri = '/' + collection_name + '/' + link["id"]
            link["target"] = getHref(request, target_uri)
        link_uri = '/groups/' + group_id + '/links/' + link['title']
        link["href"] = getHref(request, link_uri)

    resp_json = {}
    resp_json["links"] = links
    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({
        'rel': 'self',
        'href': getHref(request, group_uri + '/links')
    })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, group_uri)})
    resp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
예제 #4
0
async def PUT_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query
  
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
  
    if not request.has_body:
        msg = "PUT Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = "Unexpected content_type: {}".format(content_type)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    log.debug("dset_json: {}".format(dset_json))

    dims = getChunkLayout(dset_json)
   
    if "root" not in dset_json:
        msg = "expected root key in dset_json"
        log.error(msg)
        raise KeyError(msg)
    
    rank = len(dims)  
     
    # get chunk selection from query params
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)   
    selection = tuple(selection)  
    log.debug("got selection: {}".format(selection))

    type_json = dset_json["type"]
    itemsize = 'H5T_VARIABLE'  
    if "size" in type_json:
        itemsize = type_json["size"]
    dt = createDataType(type_json)
    log.debug("dtype: {}".format(dt))
    
    if rank == 0:
        msg = "No dimension passed to PUT chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug("selection[{}]: {}".format(i, s))

    mshape = getSelectionShape(selection)
    log.debug(f"mshape: {mshape}")
    num_elements = 1
    for extent in mshape:
        num_elements *= extent
        
    # check that the content_length is what we expect
    if itemsize != 'H5T_VARIABLE':
        log.debug("expect content_length: {}".format(num_elements*itemsize))
    log.debug("actual content_length: {}".format(request.content_length))

    if itemsize != 'H5T_VARIABLE' and (num_elements * itemsize) != request.content_length:
        msg = "Expected content_length of: {}, but got: {}".format(num_elements*itemsize, request.content_length)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

     # create a numpy array for incoming data
    input_bytes = await request_read(request)  # TBD - will it cause problems when failures are raised before reading data?
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()
        
    input_arr = bytesToArray(input_bytes, dt, mshape)

    chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=True)

    # update chunk array
    chunk_arr[selection] = input_arr
    chunk_cache = app["chunk_cache"]
    chunk_cache.setDirty(chunk_id)
    log.info(f"PUT_Chunk dirty cache count: {chunk_cache.dirtyCount}")

    # async write to S3   
    dirty_ids = app["dirty_ids"]
    now = int(time.time())
    dirty_ids[chunk_id] = now
    
    # chunk update successful     
    resp = json_response({}, status=201)
    log.response(request, resp=resp)
    return resp
예제 #5
0
async def POST_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query

    put_points = False
    num_points = 0
    if "count" in params:
        num_points = int(params["count"])

    if "action" in params and params["action"] == "put":
        log.info("POST Chunk put points, num_points: {}".format(num_points))

        put_points = True
    else:
        log.info("POST Chunk get points")

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info("POST chunk_id: {}".format(chunk_id))
    chunk_index = getChunkIndex(chunk_id)
    log.debug("chunk_index: {}".format(chunk_index))
    
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    log.debug("dset_json: {}".format(dset_json))
    chunk_layout = getChunkLayout(dset_json)
    chunk_coord = getChunkCoordinate(chunk_id, chunk_layout)
    log.debug("chunk_coord: {}".format(chunk_coord))
    
    if not request.has_body:
        msg = "POST Value with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    content_type = "application/octet-stream"
    if "Content-Type" in request.headers:
        # client should use "application/octet-stream" for binary transfer
        content_type = request.headers["Content-Type"]
    if content_type != "application/octet-stream":
        msg = "Unexpected content_type: {}".format(content_type)
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
     
    type_json = dset_json["type"]
    dset_dtype = createDataType(type_json)
    log.debug("dtype: {}".format(dset_dtype))

    dims = getChunkLayout(dset_json)
    log.debug("got dims: {}".format(dims))
    rank = len(dims)
    if rank == 0:
        msg = "POST chunk request with no dimensions"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    # create a numpy array for incoming points
    input_bytes = await request_read(request) 
    if len(input_bytes) != request.content_length:
        msg = "Read {} bytes, expecting: {}".format(len(input_bytes), request.content_length)
        log.error(msg)
        raise HTTPInternalServerError()

    # get chunk from cache/s3.  If not found init a new chunk if this is a write request
    chunk_arr = await getChunk(app, chunk_id, dset_json, chunk_init=put_points) 

   
    if put_points:
        # writing point data
        

        # create a numpy array with the following type:
        #       (coord1, coord2, ...) | dset_dtype
        if rank == 1:
            coord_type_str = "uint64"
        else:
            coord_type_str = "({},)uint64".format(rank)
        comp_dtype = np.dtype([("coord", np.dtype(coord_type_str)), ("value", dset_dtype)])
        point_arr = np.fromstring(input_bytes, dtype=comp_dtype)
        if len(point_arr) != num_points:
            msg = "Unexpected size of point array, got: {} expected: {}".format(len(point_arr), num_points)
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        for i in range(num_points):
            elem = point_arr[i]
            if rank == 1:
                coord = int(elem[0])
            else:
                coord = tuple(elem[0]) # index to update
            val = elem[1]   # value 
            chunk_arr[coord] = val # update the point

        chunk_cache = app["chunk_cache"]
        chunk_cache.setDirty(chunk_id)

        # async write to S3   
        dirty_ids = app["dirty_ids"]
        now = int(time.time())
        dirty_ids[chunk_id] = now
        log.info("set {} to dirty".format(chunk_id))
    
    else:
        # reading point data  
        point_dt = np.dtype('uint64')  # use unsigned long for point index  
        point_arr = np.fromstring(input_bytes, dtype=point_dt)  # read points as unsigned longs
        if len(point_arr) % rank != 0:
            msg = "Unexpected size of point array"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        num_points = len(point_arr) // rank
        log.debug("got {} points".format(num_points))

        point_arr = point_arr.reshape((num_points, rank))    
        output_arr = np.zeros((num_points,), dtype=dset_dtype)
    
        for i in range(num_points):
            point = point_arr[i,:]
            tr_point = getChunkRelativePoint(chunk_coord, point)
            val = chunk_arr[tuple(tr_point)]
            output_arr[i] = val
     
    if put_points:
        # write empty response
        resp = json_response({})
    else:
        # get data
        output_data = output_arr.tobytes()

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()
        finally:
            await resp.write_eof()

        
    return resp
예제 #6
0
async def k8s_register(app):
    log.info("k8s_register")
    # TBD - find more elegant way to avoid this warning
    import urllib3
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    k8s_config.load_incluster_config(
    )  #get the config from within the cluster and set it as the default config for all new clients
    c = k8s_client.Configuration()  #go and get a copy of the default config
    c.verify_ssl = False  #set verify_ssl to false in that config
    k8s_client.Configuration.set_default(
        c)  #make that config the default for all new clients
    v1 = k8s_client.CoreV1Api()
    # TBD - use the async version
    ret = v1.list_pod_for_all_namespaces(watch=False)
    pod_ips = []
    sn_urls = {}
    dn_urls = {}
    for i in ret.items:
        pod_ip = i.status.pod_ip
        if not pod_ip:
            continue
        labels = i.metadata.labels
        if "app" in labels and labels["app"] == "hsds":
            log.info(f"hsds pod - ip: {pod_ip}")
            pod_ips.append(pod_ip)
    if not pod_ips:
        log.error("Expected to find at least one hsds pod")
        return
    pod_ips.sort()  # for assigning node numbers
    node_count = len(pod_ips)
    ready_count = 0
    this_node_id = app["id"]
    sn_port = config.get("sn_port")
    dn_port = config.get("dn_port")
    for node_number in range(node_count):
        for port in (sn_port, dn_port):
            # send an info request to the node
            pod_ip = pod_ips[node_number]
            url = f"http://{pod_ip}:{port}"
            if port == sn_port:
                sn_urls[node_number] = url
            else:
                dn_urls[node_number] = url

            info_rsp = await get_info(app, url)
            if not info_rsp:
                # timeout or other failure
                continue
            if "node" not in info_rsp:
                log.error("expected to find node key in info resp")
                continue

            node_rsp = info_rsp["node"]
            log.debug(f"got info resp: {node_rsp}")
            for key in ("type", "id", "node_number", "node_count"):
                if key not in node_rsp:
                    log.error(
                        f"unexpected node type in node state, expected to find key: {key}"
                    )
                    continue
            if node_rsp["type"] not in ("sn", "dn"):
                log.error(f"expected node_type to be sn or dn")
                continue
            node_id = node_rsp["id"]
            if node_id == this_node_id:
                # set node_number and node_count
                log.debug("got info_rsp for this node")
                if app["node_number"] != node_number:
                    old_number = app["node_number"]
                    log.info(
                        f"node_number has changed - old value was {old_number} new number is {node_number}"
                    )
                    if app["node_type"] == "dn":
                        meta_cache = app["meta_cache"]
                        chunk_cache = app["chunk_cache"]
                        if meta_cache.dirtyCount > 0 or chunk_cache.dirtyCount > 0:
                            # set the node state to waiting till the chunk cache have been flushed
                            if app["node_state"] == "READY":
                                log.info(
                                    "setting node_state to waiting while cache is flushing"
                                )
                                app["node_state"] = "WAITING"
                        else:
                            meta_cache.clearCache()
                            chunk_cache.clearCache()
                            log.info(
                                f"node number was: {old_number} setting to: {node_number}"
                            )
                            app["node_number"] = node_number
                            app['register_time'] = time.time()
                    else:
                        # SN nodes can update node_number immediately
                        log.info(
                            f"node number was: {old_number} setting to: {node_number}"
                        )
                        app["node_number"] = node_number
                        app['register_time'] = time.time()
                if app["node_count"] != node_count:
                    old_count = app["node_count"]
                    log.info(
                        f"node count was: {old_count} setting to: {node_count}"
                    )
                    app["node_count"] = node_count
            if node_number == node_rsp[
                    "node_number"] and node_count == node_rsp["node_count"]:
                ready_count += 1
                log.debug(f"incremented ready_count to {ready_count}")
            else:
                log.info(f"differing node_number/node_count for url: {url}")
                log.info(
                    f"expected node_number: {node_number} actual: {node_rsp['node_number']}"
                )
                log.info(
                    f"expected node_count: {node_count} actual: {node_rsp['node_count']}"
                )

    if ready_count == node_count * 2:
        if app["node_state"] != "READY":
            log.info("setting node state to READY")
            app["node_state"] = "READY"
        app["node_count"] = node_count
        app["sn_urls"] = sn_urls
        app["dn_urls"] = dn_urls
    else:
        log.info(
            f"not all pods ready - ready_count: {ready_count}/{node_count*2}")
        if app["node_state"] == "READY":
            log.info("setting node state to SCALING")
            app["node_state"] = "SCALING"
예제 #7
0
async def info(request):
    """HTTP Method to retun node state to caller"""
    log.debug("info request")
    app = request.app
    answer = {}
    # copy relevant entries from state dictionary to response
    node = {}
    node['id'] = request.app['id']
    node['type'] = request.app['node_type']
    node['start_time'] = app["start_time"]  #unixTimeToUTC(app['start_time'])
    node['state'] = app['node_state']
    node['node_number'] = app['node_number']
    node['node_count'] = app['node_count']

    answer["node"] = node
    # psutil info
    # see: http://pythonhosted.org/psutil/ for description of different fields
    cpu = {}
    cpu["percent"] = psutil.cpu_percent()
    cpu["cores"] = psutil.cpu_count()
    answer["cpu"] = cpu
    diskio = psutil.disk_io_counters()
    disk_stats = {}
    disk_stats["read_count"] = diskio.read_count
    disk_stats["read_time"] = diskio.read_time
    disk_stats["read_bytes"] = diskio.read_bytes
    disk_stats["write_count"] = diskio.write_count
    disk_stats["write_time"] = diskio.write_time
    disk_stats["write_bytes"] = diskio.write_bytes
    answer["diskio"] = disk_stats
    netio = psutil.net_io_counters()
    net_stats = {}
    net_stats["bytes_sent"] = netio.bytes_sent
    net_stats["bytes_sent"] = netio.bytes_recv
    net_stats["packets_sent"] = netio.packets_sent
    net_stats["packets_recv"] = netio.packets_recv
    net_stats["errin"] = netio.errin
    net_stats["errout"] = netio.errout
    net_stats["dropin"] = netio.dropin
    net_stats["dropout"] = netio.dropout
    answer["netio"] = net_stats
    mem_stats = {}
    svmem = psutil.virtual_memory()
    mem_stats["phys_total"] = svmem.total
    mem_stats["phys_available"] = svmem.available
    sswap = psutil.swap_memory()
    mem_stats["swap_total"] = sswap.total
    mem_stats["swap_used"] = sswap.used
    mem_stats["swap_free"] = sswap.free
    mem_stats["percent"] = sswap.percent
    answer["memory"] = mem_stats
    disk_stats = {}
    sdiskusage = psutil.disk_usage('/')
    disk_stats["total"] = sdiskusage.total
    disk_stats["used"] = sdiskusage.used
    disk_stats["free"] = sdiskusage.free
    disk_stats["percent"] = sdiskusage.percent
    answer["disk"] = disk_stats
    answer["log_stats"] = app["log_count"]
    answer["req_count"] = app["req_count"]
    if "s3_stats" in app:
        answer["s3_stats"] = app["s3_stats"]
    mc_stats = {}
    if "meta_cache" in app:
        mc = app["meta_cache"]  # only DN nodes have this
        mc_stats["count"] = len(mc)
        mc_stats["dirty_count"] = mc.dirtyCount
        mc_stats["utililization_per"] = mc.cacheUtilizationPercent
        mc_stats["mem_used"] = mc.memUsed
        mc_stats["mem_target"] = mc.memTarget
    answer["meta_cache_stats"] = mc_stats
    cc_stats = {}
    if "chunk_cache" in app:
        cc = app["chunk_cache"]  # only DN nodes have this
        cc_stats["count"] = len(cc)
        cc_stats["dirty_count"] = cc.dirtyCount
        cc_stats["utililization_per"] = cc.cacheUtilizationPercent
        cc_stats["mem_used"] = cc.memUsed
        cc_stats["mem_target"] = cc.memTarget
    answer["chunk_cache_stats"] = cc_stats
    dc_stats = {}
    if "domain_cache" in app:
        dc = app["domain_cache"]  # only DN nodes have this
        dc_stats["count"] = len(dc)
        dc_stats["dirty_count"] = dc.dirtyCount
        dc_stats["utililization_per"] = dc.cacheUtilizationPercent
        dc_stats["mem_used"] = dc.memUsed
        dc_stats["mem_target"] = dc.memTarget
    answer["domain_cache_stats"] = dc_stats

    resp = await jsonResponse(request, answer)
    log.response(request, resp=resp)
    return resp
예제 #8
0
async def GET_Domain(request):
    """HTTP method to return JSON for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = None
    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        log.warn("Invalid domain")
        raise HTTPBadRequest(reason="Invalid domain name")

    verbose = False
    if "verbose" in params and params["verbose"]:
        verbose = True

    if not domain:
        log.info("no domain passed in, returning all top-level domains")
        # no domain passed in, return top-level domains for this request
        domains = await get_domains(request)
        rsp_json = {"domains": domains}
        rsp_json["hrefs"] = []
        resp = await jsonResponse(request, rsp_json)
        log.response(request, resp=resp)
        return resp

    log.info("got domain: {}".format(domain))

    domain_json = await getDomainJson(app, domain, reload=True)

    if domain_json is None:
        log.warn("domain: {} not found".format(domain))
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    if "h5path" in params:
        # if h5path is passed in, return object info for that path
        #   (if exists)
        h5path = params["h5path"]
        root_id = domain_json["root"]
        obj_id = await getObjectIdByPath(app, root_id,
                                         h5path)  # throws 404 if not found
        log.info("get obj_id: {} from h5path: {}".format(obj_id, h5path))
        # get authoritative state for object from DN (even if it's in the meta_cache).
        obj_json = await getObjectJson(app, obj_id, refresh=True)
        obj_json["domain"] = domain
        # Not bothering with hrefs for h5path lookups...
        resp = await jsonResponse(request, obj_json)
        log.response(request, resp=resp)
        return resp

    # return just the keys as per the REST API
    rsp_json = await get_domain_response(app, domain_json, verbose=verbose)

    # include domain objects if requested
    if "getobjs" in params and params["getobjs"] and "root" in domain_json:
        root_id = domain_json["root"]
        include_attrs = False
        if "include_attrs" in params and params["include_attrs"]:
            include_attrs = True
        domain_objs = await getDomainObjects(app,
                                             root_id,
                                             include_attrs=include_attrs)
        rsp_json["domain_objs"] = domain_objs

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'database',
            'href': getHref(request, '/datasets')
        })
        hrefs.append({'rel': 'groupbase', 'href': getHref(request, '/groups')})
        hrefs.append({
            'rel': 'typebase',
            'href': getHref(request, '/datatypes')
        })
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })

    hrefs.append({'rel': 'acls', 'href': getHref(request, '/acls')})
    parent_domain = getParentDomain(domain)
    log.debug("href parent domain: {}".format(parent_domain))
    if parent_domain:
        hrefs.append({
            'rel': 'parent',
            'href': getHref(request, '/', domain=parent_domain)
        })

    rsp_json["hrefs"] = hrefs
    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
예제 #9
0
async def PUT_Domain(request):
    """HTTP method to create a new domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query
    # verify username, password
    username, pswd = getUserPasswordFromRequest(
        request)  # throws exception if user/password is not valid
    await validateUserPassword(app, username, pswd)

    # inital perms for owner and default
    owner_perm = {
        'create': True,
        'read': True,
        'update': True,
        'delete': True,
        'readACL': True,
        'updateACL': True
    }
    default_perm = {
        'create': False,
        'read': True,
        'update': False,
        'delete': False,
        'readACL': False,
        'updateACL': False
    }

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    log.info("PUT domain: {}, username: {}".format(domain, username))

    body = None
    if request.has_body:
        body = await request.json()
        log.debug("PUT domain with body: {}".format(body))

    if ("flush" in params and params["flush"]) or (body and "flush" in body
                                                   and body["flush"]):
        # flush domain - update existing domain rather than create a new resource
        domain_json = await getDomainJson(app, domain, reload=True)
        log.debug("got domain_json: {}".format(domain_json))

        if domain_json is None:
            log.warn("domain: {} not found".format(domain))
            raise HTTPNotFound()

        if 'owner' not in domain_json:
            log.error("No owner key found in domain")
            raise HTTPInternalServerError()

        if 'acls' not in domain_json:
            log.error("No acls key found in domain")
            raise HTTPInternalServerError()

        aclCheck(domain_json, "update",
                 username)  # throws exception if not allowed
        if "root" in domain_json:
            # nothing to do for folder objects
            await doFlush(app, domain_json["root"])
        # flush  successful
        resp = await jsonResponse(request, None, status=204)
        log.response(request, resp=resp)
        return resp

    is_folder = False
    owner = username
    linked_domain = None
    root_id = None

    if body and "folder" in body:
        if body["folder"]:
            is_folder = True
    if body and "owner" in body:
        owner = body["owner"]
    if body and "linked_domain" in body:
        if is_folder:
            msg = "Folder domains can not be used for links"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        linked_domain = body["linked_domain"]
        log.info(f"linking to domain: {linked_domain}")

    if owner != username and username != "admin":
        log.warn("Only admin users are allowed to set owner for new domains")
        raise HTTPForbidden()

    parent_domain = getParentDomain(domain)
    log.debug("Parent domain: [{}]".format(parent_domain))

    if (not parent_domain or parent_domain == '/') and not is_folder:
        msg = "Only folder domains can be created at the top-level"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if (not parent_domain or parent_domain == '/') and username != "admin":
        msg = "creation of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    parent_json = None
    if parent_domain and parent_domain != '/':
        try:
            parent_json = await getDomainJson(app, parent_domain, reload=True)
        except ClientResponseError as ce:
            if ce.code == 404:
                msg = "Parent domain: {} not found".format(parent_domain)
                log.warn(msg)
                raise HTTPNotFound()
            elif ce.code == 410:
                msg = "Parent domain: {} removed".format(parent_domain)
                log.warn(msg)
                raise HTTPGone()
            else:
                log.error(f"Unexpected error: {ce.code}")
                raise HTTPInternalServerError()

        log.debug("parent_json {}: {}".format(parent_domain, parent_json))
        if "root" in parent_json and parent_json["root"]:
            msg = "Parent domain must be a folder"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    if parent_json:
        aclCheck(parent_json, "create",
                 username)  # throws exception if not allowed

    if linked_domain:
        linked_json = await getDomainJson(app, linked_domain, reload=True)
        log.debug(f"got linked json: {linked_json}")
        if "root" not in linked_json:
            msg = "Folder domains cannot ber used as link target"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        root_id = linked_json["root"]
        aclCheck(linked_json, "read", username)
        aclCheck(linked_json, "delete", username)
    else:
        linked_json = None

    if not is_folder and not linked_json:
        # create a root group for the new domain
        root_id = createObjId("roots")
        log.debug("new root group id: {}".format(root_id))
        group_json = {"id": root_id, "root": root_id, "domain": domain}
        log.debug("create group for domain, body: " + json.dumps(group_json))

        # create root group
        req = getDataNodeUrl(app, root_id) + "/groups"
        try:
            group_json = await http_post(app, req, data=group_json)
        except ClientResponseError as ce:
            msg = "Error creating root group for domain -- " + str(ce)
            log.error(msg)
            raise HTTPInternalServerError()
    else:
        log.debug("no root group, creating folder")

    domain_json = {}

    domain_acls = {}
    # owner gets full control
    domain_acls[owner] = owner_perm
    if config.get("default_public") or is_folder:
        # this will make the domain public readable
        log.debug("adding default perm for domain: {}".format(domain))
        domain_acls["default"] = default_perm

    # construct dn request to create new domain
    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"owner": owner, "domain": domain}
    body["acls"] = domain_acls

    if root_id:
        body["root"] = root_id

    log.debug("creating domain: {} with body: {}".format(domain, body))
    try:
        domain_json = await http_put(app, req, data=body)
    except ClientResponseError as ce:
        msg = "Error creating domain state -- " + str(ce)
        log.error(msg)
        raise HTTPInternalServerError()

    # domain creation successful
    # maxin limits
    domain_json["limits"] = getLimits()
    domain_json["version"] = getVersion()
    resp = await jsonResponse(request, domain_json, status=201)
    log.response(request, resp=resp)
    return resp
예제 #10
0
async def GET_Datatypes(request):
    """HTTP method to return datatype collection for given domain"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code in (404, 410):
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"Unexpected Error: {ce.code})")
            raise HTTPInternalServerError()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "read",
             username)  # throws exception if not authorized

    limit = None
    if "Limit" in params:
        try:
            limit = int(params["Limit"])
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
    marker = None
    if "Marker" in params:
        marker = params["Marker"]

    # get the datatype collection list
    obj_ids = []
    if "root" in domain_json or domain_json["root"]:
        # get the groups collection list
        collections = await get_collections(app, domain_json["root"])
        objs = collections["datatypes"]
        obj_ids = getIdList(objs, marker=marker, limit=limit)

    # create hrefs
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/datatypes')})
    if "root" in domain_json:
        root_uuid = domain_json["root"]
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + root_uuid)
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})

    # return obj ids and hrefs
    rsp_json = {}
    rsp_json["datatypes"] = obj_ids
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
예제 #11
0
async def get_domains(request):
    """ This method is called by GET_Domains and GET_Domain """
    app = request.app

    # if there is no domain passed in, get a list of top level domains
    if "domain" not in request.rel_url.query:
        prefix = '/'
    else:
        prefix = request.rel_url.query["domain"]
    log.info(f"get_domains for: {prefix}")

    if not prefix.startswith('/'):
        msg = "Prefix must start with '/'"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # always use "verbose" to pull extra info
    if "verbose" in request.rel_url.query and request.rel_url.query["verbose"]:
        verbose = True
    else:
        verbose = False

    limit = None
    if "Limit" in request.rel_url.query:
        try:
            limit = int(request.rel_url.query["Limit"])
            log.debug(f"GET_Domains - using Limit: {limit}")
        except ValueError:
            msg = "Bad Request: Expected int type for limit"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    marker = None
    if "Marker" in request.rel_url.query:
        marker = request.rel_url.query["Marker"]
        log.debug(f"got Marker request param: {marker}")

    # list the S3 keys for this prefix
    domainNames = []
    if prefix == "/":
        # search for domains from the top_level domains config
        domainNames = config.get("top_level_domains")
    else:
        s3prefix = prefix[1:]
        log.debug(f"listing S3 keys for {s3prefix}")
        s3keys = await getS3Keys(app,
                                 include_stats=False,
                                 prefix=s3prefix,
                                 deliminator='/')
        log.debug(f"getS3Keys returned: {len(s3keys)} keys")
        log.debug(f"s3keys {s3keys}")

        for s3key in s3keys:
            if s3key[-1] != '/':
                log.debug(f"ignoring key: {s3key}")
                continue
            log.debug(f"got s3key: {s3key}")
            domain = "/" + s3key[:-1]
            if marker:
                if marker == domain:
                    marker = None
                    continue

            log.debug(f"adding domain: {domain} to domain list")
            domainNames.append(domain)

            if limit and len(domainNames) == limit:
                # got to requested limit
                break

    # get domain info for each domain
    domains = []
    for domain in domainNames:
        try:
            # query DN's for domain json
            # TBD - multicast to DN nodes
            log.debug(f"getDomainJson for {domain}")
            domain_json = await getDomainJson(app, domain, reload=True)
            if domain_json:
                domain_rsp = await get_domain_response(app,
                                                       domain_json,
                                                       verbose=verbose)
                # mixin domain anme
                domain_rsp["name"] = domain
                domains.append(domain_rsp)
        except HTTPNotFound:
            # One of the dmains not found, but continue through the list
            log.debug(f"not found error for: {domain}")

    return domains
예제 #12
0
async def GET_Group(request):
    """HTTP method to return JSON for group"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    h5path = None
    getAlias = False
    include_links = False
    include_attrs = False
    group_id = request.match_info.get('id')
    if not group_id and "h5path" not in params:
        # no id, or path provided, so bad request
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if group_id:
        log.info(f"GET_Group, id: {group_id}")
        # is the id a group id and not something else?
        if not isValidUuid(group_id, "Group"):
            msg = f"Invalid group id: {group_id}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if "getalias" in params:
            if params["getalias"]:
                getAlias = True
    if "h5path" in params:
        h5path = params["h5path"]
        if not group_id and h5path[0] != '/':
            msg = "h5paths must be absolute if no parent id is provided"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        log.info(f"GET_Group, h5path: {h5path}")
    if "include_links" in params and params["include_links"]:
        include_links = True
    if "include_attrs" in params and params["include_attrs"]:
        include_attrs = True

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    if h5path and h5path[0] == '/':
        # ignore the request path id (if given) and start
        # from root group for absolute paths

        domain_json = await getDomainJson(app, domain)
        if "root" not in domain_json:
            msg = f"Expected root key for domain: {domain}"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        group_id = domain_json["root"]

    if h5path:
        group_id = await getObjectIdByPath(app,
                                           group_id,
                                           h5path,
                                           bucket=bucket
                                           )  # throws 404 if not found
        if not isValidUuid(group_id, "Group"):
            msg = f"No group exist with the path: {h5path}"
            log.warn(msg)
            raise HTTPNotFound()
        log.info(f"get group_id: {group_id} from h5path: {h5path}")

    # verify authorization to read the group
    await validateAction(app, domain, group_id, username, "read")

    # get authoritative state for group from DN (even if it's in the meta_cache).
    group_json = await getObjectJson(app,
                                     group_id,
                                     refresh=True,
                                     include_links=include_links,
                                     include_attrs=include_attrs,
                                     bucket=bucket)
    log.debug(f"domain from request: {domain}")
    group_json["domain"] = getPathForDomain(domain)
    if bucket:
        group_json["bucket"] = bucket

    if getAlias:
        root_id = group_json["root"]
        alias = []
        if group_id == root_id:
            alias.append('/')
        else:
            idpath_map = {root_id: '/'}
            h5path = await getPathForObjectId(app,
                                              root_id,
                                              idpath_map,
                                              tgt_id=group_id,
                                              bucket=bucket)
            if h5path:
                alias.append(h5path)
        group_json["alias"] = alias

    hrefs = []
    group_uri = '/groups/' + group_id
    hrefs.append({'rel': 'self', 'href': getHref(request, group_uri)})
    hrefs.append({
        'rel': 'links',
        'href': getHref(request, group_uri + '/links')
    })
    root_uri = '/groups/' + group_json["root"]
    hrefs.append({'rel': 'root', 'href': getHref(request, root_uri)})
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({
        'rel': 'attributes',
        'href': getHref(request, group_uri + '/attributes')
    })
    group_json["hrefs"] = hrefs

    resp = await jsonResponse(request, group_json)
    log.response(request, resp=resp)
    return resp
예제 #13
0
async def POST_Group(request):
    """HTTP method to create new Group object"""
    log.request(request)
    app = request.app

    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    domain_json = await getDomainJson(app, domain, reload=True)

    aclCheck(domain_json, "create",
             username)  # throws exception if not allowed

    if "root" not in domain_json:
        msg = f"Expected root key for domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    link_id = None
    link_title = None
    if request.has_body:
        body = await request.json()
        log.info(f"POST Group body: {body}")
        if body:
            if "link" in body:
                link_body = body["link"]
                log.debug(f"link_body: {link_body}")
                if "id" in link_body:
                    link_id = link_body["id"]
                if "name" in link_body:
                    link_title = link_body["name"]
                if link_id and link_title:
                    log.debug(f"link id: {link_id}")
                    # verify that the referenced id exists and is in this domain
                    # and that the requestor has permissions to create a link
                    await validateAction(app, domain, link_id, username,
                                         "create")
            if not link_id or not link_title:
                log.warn(f"POST Group body with no link: {body}")

    domain_json = await getDomainJson(
        app, domain)  # get again in case cache was invalidated

    root_id = domain_json["root"]
    group_id = createObjId("groups", rootid=root_id)
    log.info(f"new  group id: {group_id}")
    group_json = {"id": group_id, "root": root_id}
    log.debug("create group, body: " + json.dumps(group_json))
    req = getDataNodeUrl(app, group_id) + "/groups"
    params = {}
    if bucket:
        params["bucket"] = bucket

    group_json = await http_post(app, req, data=group_json, params=params)

    # create link if requested
    if link_id and link_title:
        link_json = {}
        link_json["id"] = group_id
        link_json["class"] = "H5L_TYPE_HARD"
        link_req = getDataNodeUrl(app, link_id)
        link_req += "/groups/" + link_id + "/links/" + link_title
        log.debug("PUT link - : " + link_req)
        put_json_rsp = await http_put(app,
                                      link_req,
                                      data=link_json,
                                      params=params)
        log.debug(f"PUT Link resp: {put_json_rsp}")
    log.debug("returning resp")
    # group creation successful
    resp = await jsonResponse(request, group_json, status=201)
    log.response(request, resp=resp)
    return resp
예제 #14
0
파일: async_lib.py 프로젝트: t20100/hsds
def scanRootCallback(app, s3keys):
    log.debug(f"scanRootCallback, {len(s3keys)} items")
    if isinstance(s3keys, list):
        log.error("got list result for s3keys callback")
        raise ValueError("unexpected callback format")

    results = app["scanRoot_results"]
    if results:
        log.debug(f"previous scanRoot_results:".format(results))
    for s3key in s3keys.keys():

        if not isS3ObjKey(s3key):
            log.info(f"not s3obj key, ignoring: {s3key}")
            continue
        objid = getObjId(s3key)
        etag = None
        obj_size = None
        lastModified = None
        item = s3keys[s3key]
        if "ETag" in item:
            etag = item["ETag"]
        if "Size" in item:
            obj_size = item["Size"]
        if "LastModified" in item:
            lastModified = item["LastModified"]
        log.debug(f"{objid}: {etag} {obj_size} {lastModified}")

        if lastModified > results["lastModified"]:
            log.debug(f"changing lastModified from: {results['lastModified']} to {lastModified}")
            results["lastModified"] = lastModified
        is_chunk = False
        if isValidChunkId(objid):
            is_chunk = True
            results["num_chunks"] += 1
            results["allocated_bytes"] += obj_size
        else:
            results["metadata_bytes"] += obj_size


        if is_chunk or getCollectionForId(objid) == "datasets":
            if is_chunk:
                dsetid = getDatasetId(objid)
            else:
                dsetid = objid
            datasets = results["datasets"]
            if dsetid not in datasets:
                dataset_info = {}
                dataset_info["lastModified"] = 0
                dataset_info["num_chunks"] = 0
                dataset_info["allocated_bytes"] = 0
                datasets[dsetid] = dataset_info
            dataset_info = datasets[dsetid]
            if lastModified > dataset_info["lastModified"]:
                dataset_info["lastModified"] = lastModified
                if is_chunk:
                    dataset_info["num_chunks"] += 1
                    dataset_info["allocated_bytes"] += obj_size
        elif getCollectionForId(objid) == "groups":
            results["num_groups"] += 1
        elif getCollectionForId(objid) == "datatypes":
            results["num_datatypes"] += 1
        else:
            log.error(f"Unexpected collection type for id: {objid}")
예제 #15
0
    async def put_object(self, key, data, bucket=None):
        """ Write data to given key.
            Returns client specific dict on success
        """
        if not bucket:
            log.error("put_object - bucket not set")
            raise HTTPInternalServerError()

        start_time = time.time()
        log.debug(
            f"azureBlobClient.put_object({bucket}/{key} start: {start_time}")
        try:
            async with self._client.get_blob_client(container=bucket,
                                                    blob=key) as blob_client:
                blob_rsp = await blob_client.upload_blob(data,
                                                         blob_type='BlockBlob',
                                                         overwrite=True)

            finish_time = time.time()
            ETag = blob_rsp["etag"]
            lastModified = int(blob_rsp["last_modified"].timestamp())
            data_size = len(data)
            rsp = {
                "ETag": ETag,
                "size": data_size,
                "LastModified": lastModified
            }
            log.debug(f"put_object {key} returning: {rsp}")

            log.info(
                f"azureBlobClient.put_object({key} bucket={bucket}) start={start_time:.4f} finish={finish_time:.4f} elapsed={finish_time-start_time:.4f} bytes={len(data)}"
            )

        except CancelledError as cle:
            self._azure_stats_increment("error_count")
            msg = f"azureBlobClient.CancelledError for put_object {key}: {cle}"
            log.error(msg)
            raise HTTPInternalServerError()
        except Exception as e:
            if isinstance(e, AzureError):
                if e.status_code == 404:
                    msg = f"azureBlobClient.key: {key} not found "
                    log.warn(msg)
                    raise HTTPNotFound()
                elif e.status_code in (401, 403):
                    msg = f"azureBlobClient.access denied for get key: {key}"
                    log.info(msg)
                    raise HTTPForbidden()
                else:
                    self._azure_stats_increment("error_count")
                    log.error(
                        f"azureBlobClient.got unexpected AzureError for get_object {key}: {e.message}"
                    )
                    raise HTTPInternalServerError()
            else:
                log.error(
                    f"azureBlobClient.Unexpected exception for put_object {key}: {e}"
                )
                raise HTTPInternalServerError()

        if data and len(data) > 0:
            self._azure_stats_increment("bytes_out", inc=len(data))
        log.debug(f"azureBlobClient.put_object {key} complete, rsp: {rsp}")
        return rsp
예제 #16
0
    async def fetch(self, obj_id):
        log.debug(f"DomainCrawler - fetch for obj_id: {obj_id}")
        obj_json = await getObjectJson(self._app,
                                       obj_id,
                                       include_links=True,
                                       include_attrs=self._include_attrs)
        log.debug(f"DomainCrawler - for {obj_id} got json: {obj_json}")

        # including links, so don't need link count
        if "link_count" in obj_json:
            del obj_json["link_count"]
        self._obj_dict[obj_id] = obj_json
        if self._include_attrs:
            del obj_json["attributeCount"]

        # if this is a group, iterate through all the hard links and
        # add to the lookup ids set
        if getCollectionForId(obj_id) == "groups":
            links = obj_json["links"]
            log.debug(f"DomainCrawler links: {links}")
            for title in links:
                log.debug(f"DomainCrawler - got link: {title}")
                link_obj = links[title]
                if link_obj["class"] != 'H5L_TYPE_HARD':
                    continue
                link_id = link_obj["id"]
                if link_id not in self._obj_dict:
                    # haven't seen this object yet, get obj json
                    log.debug(f"DomainCrawler - adding link_id: {link_id}")
                    self._obj_dict[link_id] = {}  # placeholder for obj id
                    self._q.put_nowait(link_id)
        log.debug(f"DomainCrawler - fetch conplete obj_id: {obj_id}")
예제 #17
0
async def oio_register(app):
    """ register with oio conscience
    """
    log.info("oio_register")

    oio_proxy = app["oio_proxy"]
    host_ip = app["host_ip"]
    if not host_ip:
        log.error("host ip not set")
        return
    node_type = app["node_type"]
    if node_type not in ("sn", "dn"):
        log.error("unexpected node type")
        return
    service_name = "hdf" + node_type
    req = oio_proxy + "/v3.0/OPENIO/conscience/register"

    body = {
        "addr": host_ip + ":" + str(app["node_port"]),
        "tags": {
            "stat.cpu": 100,
            "tag.up": True
        },
        "type": service_name
    }
    log.debug(f"conscience register: body: {body}")
    try:
        await http_post(app, req, data=body)
    except ClientError as client_exception:
        log.error(
            f"got ClientError registering with oio_proxy: {client_exception}")
        return
    except CancelledError as cancelled_exception:
        log.error(
            f"got CanceeledError registering with oio_proxy: {cancelled_exception}"
        )
        return
    log.info("oio registration successful")

    # get list of DN containers
    req = oio_proxy + "/v3.0/OPENIO/conscience/list?type=hdfdn"
    try:
        dn_node_list = await http_get(app, req)
    except ClientError as client_exception:
        log.error(
            f"got ClientError listing dn nodes with oio_proxy: {client_exception}"
        )
        return
    except CancelledError as cancelled_exception:
        log.error(
            f"got CanceeledError listing dn nodes with oio_proxy: {cancelled_exception}"
        )
        return
    log.info(f"got {len(dn_node_list)} conscience list items")
    # create map keyed by dn addr
    dn_node_map = {}
    for dn_node in dn_node_list:
        log.debug(f"checking dn conscience list item: {dn_node}")
        if "addr" not in dn_node:
            log.warn(f"conscience list item with no addr: {dn_node}")
            continue
        addr = dn_node["addr"]
        if "score" not in dn_node:
            log.warn(f'conscience list item with no score key: {dn_node}')
            continue
        if dn_node["score"] <= 0:
            log.debug(f"zero score - skipping conscience list addr: {addr}")
            continue
        if addr in dn_node_map:
            # shouldn't ever get this?
            log.warn(f"duplicate entry for node: {dn_node}")
            continue
        # send an info request to the node
        info_rsp = await get_info(app, "http://" + addr)
        if not info_rsp:
            # timeout or other failure
            continue
        if "node" not in info_rsp:
            log.error("expecteed to find node key in info resp")
            continue
        info_node = info_rsp["node"]
        log.debug(f"got info resp: {info_node}")
        for key in ("type", "id", "node_number", "node_count"):
            if key not in info_node:
                log.error(
                    f"unexpected node type in node state, expected to find key: {key}"
                )
                continue
        if info_node["type"] != "dn":
            log.error(f"expecteed node_type to be dn")
            continue
        # mix in node id, node number, node_count to the conscience info
        dn_node["node_id"] = info_node["id"]
        dn_node["node_number"] = info_node["node_number"]
        dn_node["node_count"] = info_node["node_count"]

        dn_node_map[addr] = dn_node

    log.info(f"done with dn_node_list, got: {len(dn_node_map)} active nodes")
    if len(dn_node_map) == 0:
        if app["node_state"] != "INITIALIZING":
            log.info(
                "no active DN nodes, setting cluster state to INITIALIZING")
            app["node_state"] = "INITIALIZING"
        return

    # sort map by address
    addrs = list(dn_node_map.keys())
    addrs.sort()

    # check that node number is set and is the expected value for each node key
    invalid_count = 0
    node_index = 0
    node_count = len(addrs)
    dn_urls = {}
    this_node_found = False
    this_node_id = app["id"]
    for addr in addrs:
        dn_node = dn_node_map[addr]
        log.debug(f"dn_node for index {node_index}: {dn_node}")
        node_id = dn_node["node_id"]
        if node_id == this_node_id:
            this_node_found = True
        node_number = dn_node["node_number"]
        dn_urls[node_number] = "http://" + dn_node["addr"]
        if node_index != node_number or dn_node["node_count"] != node_count:
            if node_number == -1:
                log.info(f"node {node_index} not yet initialized")
            elif node_index != node_number:
                log.warn(
                    f"node_id {node_id}, expected node_number of {node_index} but found {node_number}"
                )
            invalid_count += 1
            if node_id == app["id"]:
                # this is us, update our node_number, node_count
                if app["node_number"] != node_index:
                    # TBD - clean cache items
                    log.info(
                        f"setting node_number for this node to: {node_index}")
                    app["node_number"] = node_index
                if app["node_count"] != node_count:
                    # TBD - clean cache items
                    log.info(
                        f"setting node_count for this node to: {node_count}")
                    app["node_count"] = node_count
            invalid_count += 1
        else:
            log.debug(f"node {node_id} node number is correct")
        node_index += 1

    if invalid_count == 0:
        log.debug("no invalid nodes!")
        if app["node_state"] != "READY":
            if app["node_type"] == "dn" and not this_node_found:
                # don't go to READY unless this node shows up
                log.info(
                    f"node {this_node_id} not yet showing in proxy list, stay in INITIALIZING"
                )
            else:
                log.info("setting node state to READY")
                app["node_state"] = "READY"
                if app["node_type"] == "sn" and app["node_number"] == -1:
                    # node number shouldn't matter for SN nodes, so set to 1
                    app["node_number"] = 1
        if app["node_count"] != node_count:
            log.info(f"setting node_count to: {node_count}")
            app["node_count"] = node_count
        app["dn_urls"] = dn_urls
    else:
        log.debug(f"number invalid nodes: {invalid_count}")
        if app["node_state"] == "READY":
            log.warn("invalid nodes found, setting node state to INITIALIZING")
            app["node_state"] = "INITIALIZING"

    log.info("oio_register done")
예제 #18
0
async def DELETE_Domain(request):
    """HTTP method to delete a domain resource"""
    log.request(request)
    app = request.app
    params = request.rel_url.query

    domain = None
    meta_only = False  # if True, just delete the meta cache value
    keep_root = False
    if request.has_body:
        body = await request.json()
        if "domain" in body:
            domain = body["domain"]
        else:
            msg = "No domain in request body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        if "meta_only" in body:
            meta_only = body["meta_only"]
        if "keep_root" in body:
            keep_root = body["keep_root"]

    else:
        # get domain from request uri
        try:
            domain = getDomainFromRequest(request)
        except ValueError:
            msg = "Invalid domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if "keep_root" in params:
            keep_root = params["keep_root"]

    log.info("meta_only domain delete: {}".format(meta_only))
    if meta_only:
        # remove from domain cache if present
        domain_cache = app["domain_cache"]
        if domain in domain_cache:
            log.info("deleting {} from domain_cache".format(domain))
            del domain_cache[domain]
        resp = await jsonResponse(request, {})
        return resp

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    parent_domain = getParentDomain(domain)
    if (not parent_domain or parent_domain == '/') and username != "admin":
        msg = "Deletion of top-level domains is only supported by admin users"
        log.warn(msg)
        raise HTTPForbidden()

    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code == 404:
            log.warn("domain not found")
            raise HTTPNotFound()
        elif ce.code == 410:
            log.warn("domain has been removed")
            raise HTTPGone()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    aclCheck(domain_json, "delete",
             username)  # throws exception if not allowed

    # check for sub-objects if this is a folder
    if "root" not in domain_json:
        s3prefix = domain[1:] + '/'
        log.info(f"checking kets with prefix: {s3prefix} ")
        s3keys = await getS3Keys(app,
                                 include_stats=False,
                                 prefix=s3prefix,
                                 deliminator='/')
        for s3key in s3keys:
            if s3key.endswith("/"):
                log.warn(f"attempt to delete folder {domain} with sub-items")
                log.debug(f"got prefix: {s3keys[0]}")
                raise HTTPConflict(reason="folder has sub-items")

    req = getDataNodeUrl(app, domain)
    req += "/domains"
    body = {"domain": domain}

    rsp_json = await http_delete(app, req, data=body)

    if "root" in domain_json and not keep_root:
        # delete the root group
        root_id = domain_json["root"]
        req = getDataNodeUrl(app, root_id)
        req += "/groups/" + root_id
        await http_delete(app, req)

    # remove from domain cache if present
    domain_cache = app["domain_cache"]
    if domain in domain_cache:
        del domain_cache[domain]

    # delete domain cache from other sn_urls
    sn_urls = app["sn_urls"]
    body["meta_only"] = True
    for node_no in sn_urls:
        if node_no == app["node_number"]:
            continue  # don't send to ourselves
        sn_url = sn_urls[node_no]
        req = sn_url + "/"
        log.info("sending sn request: {}".format(req))
        try:
            sn_rsp = await http_delete(app, req, data=body)
            log.info("{} response: {}".format(req, sn_rsp))
        except ClientResponseError as ce:
            log.warn("got error for sn_delete: {}".format(ce))

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
예제 #19
0
async def healthCheck(app):
    """ Periodic method that either registers with headnode (if state in INITIALIZING) or
    calls headnode to verify vitals about this node (otherwise)"""

    # let the server event loop startup before sarting the health check
    await asyncio.sleep(1)
    log.info("health check start")
    sleep_secs = config.get("node_sleep_time")

    while True:
        print("node_state:", app["node_state"])
        if "oio_proxy" in app:
            # for OIO post registration request every time interval
            await oio_register(app)
        elif "is_k8s" in app:
            await k8s_register(app)

        elif app["node_state"] == "INITIALIZING" or (
                app["node_state"] == "WAITING" and app["node_number"] < 0):
            # startup docker registration
            await register(app)
        else:
            # check in with the head node and make sure we are still active
            head_url = getHeadUrl(app)
            req_node = "{}/nodestate".format(head_url)
            log.debug("health check req {}".format(req_node))
            try:
                rsp_json = await http_get(app, req_node)
                if rsp_json is None or not isinstance(rsp_json, dict):
                    log.warn(
                        "invalid health check response: type: {} text: {}".
                        format(type(rsp_json), rsp_json))
                else:
                    log.debug("cluster_state: {}".format(
                        rsp_json["cluster_state"]))
                    if rsp_json["cluster_state"] != "READY" and app[
                            "node_state"] == "READY":
                        log.info("changing node_state to WAITING")
                        app["node_state"] = "WAITING"

                    #print("rsp_json: ", rsp_json)
                    # save the url's to each of the active nodes'
                    sn_urls = {}
                    dn_urls = {}
                    #  or rsp_json["host"] is None or rsp_json["id"] != app["id"]
                    this_node = None
                    for node in rsp_json["nodes"]:
                        if node["node_type"] == app["node_type"] and node[
                                "node_number"] == app["node_number"]:
                            # this should be this node

                            if node["id"] != app["id"]:
                                # flag - to re-register
                                log.warn(
                                    "mis-match node ids, app: {} vs head: {} - re-initializing"
                                    .format(node["id"], app["id"]))
                                app["node_state"] == "INITIALIZING"
                                app["node_number"] = -1
                                break
                            if not node["host"]:
                                # flag - to re-register
                                log.warn(
                                    "host not set for this node  - re-initializing"
                                    .format(node["id"], app["id"]))
                                app["node_state"] == "INITIALIZING"
                                app["node_number"] = -1
                                break
                        if not node["host"]:
                            continue  # not online
                        this_node = copy(node)
                        url = "http://" + node["host"] + ":" + str(
                            node["port"])
                        node_number = node["node_number"]
                        if node["node_type"] == "dn":
                            dn_urls[node_number] = url
                        elif node["node_type"] == "sn":
                            sn_urls[node_number] = url
                        else:
                            log.error(
                                "Unexpected node_type for node: {}".format(
                                    node))
                    app["sn_urls"] = sn_urls
                    log.debug(f"sn_urls: {sn_urls}")
                    app["dn_urls"] = dn_urls
                    log.debug(f"dn_urls: {dn_urls}")

                    if this_node is None and rsp_json[
                            "cluster_state"] != "READY":
                        log.warn("this node not found, re-initialize")
                        app["node_state"] == "INITIALIZING"
                        app["node_number"] = -1

                    if app["node_state"] == "WAITING" and rsp_json[
                            "cluster_state"] == "READY" and app[
                                "node_number"] >= 0:
                        log.info(
                            "setting node_state to READY, node_number: {}".
                            format(app["node_number"]))
                        app["node_state"] = "READY"
                    log.info("health check ok")
            except ClientError as ce:
                log.warn(f"ClientError: {ce} for health check")
            except HTTPInternalServerError as he:
                log.warn(
                    f"HTTPInternalServiceError <{he.code}> for health check")
            except HTTPNotFound as hnf:
                log.warn(f"HTTPNotFound <{hnf.code}> for health check")
            except HTTPGone as hg:
                log.warn(f"HTTPGone <{hg.code}> for health heck")

        svmem = psutil.virtual_memory()
        num_tasks = len(asyncio.Task.all_tasks())
        active_tasks = len(
            [task for task in asyncio.Task.all_tasks() if not task.done()])
        log.debug(
            f"health check sleep: {sleep_secs}, vm: {svmem.percent} num tasks: {num_tasks} active tasks: {active_tasks}"
        )
        await asyncio.sleep(sleep_secs)
예제 #20
0
async def GET_ACL(request):
    """HTTP method to return JSON for given domain/ACL"""
    log.request(request)
    app = request.app

    acl_username = request.match_info.get('username')
    if not acl_username:
        msg = "Missing username for ACL"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError as ce:
        if ce.code in (404, 410):
            msg = "domain not found"
            log.warn(msg)
            raise HTTPNotFound()
        else:
            log.error(f"unexpected error: {ce.code}")
            raise HTTPInternalServerError()

    # validate that the requesting user has permission to read ACLs in this domain
    if acl_username in (username, "default"):
        # allow read access for a users on ACL, or default
        aclCheck(domain_json, "read",
                 username)  # throws exception if not authorized
    else:
        aclCheck(domain_json, "readACL",
                 username)  # throws exception if not authorized

    if 'owner' not in domain_json:
        log.warn("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.warn("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug("got domain_json: {}".format(domain_json))

    if acl_username not in acls:
        msg = "acl for username: [{}] not found".format(acl_username)
        log.warn(msg)
        raise HTTPNotFound()

    acl = acls[acl_username]
    acl_rsp = {}
    for k in acl.keys():
        acl_rsp[k] = acl[k]
    acl_rsp["userName"] = acl_username

    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acl"] = acl_rsp
    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
예제 #21
0
async def PUT_Group(request):
    """ Handler for PUT /groups"""
    """ Used to flush all objects under a root group to S3 """

    FLUSH_TIME_OUT = 10.0  # TBD make config
    FLUSH_SLEEP_INTERVAL = 0.1  # TBD make config
    log.request(request)
    app = request.app

    root_id = request.match_info.get('id')
    log.info("PUT group: {}  (flush)".format(root_id))

    if not isValidUuid(root_id, obj_class="group"):
        log.error(f"Unexpected group_id: {root_id}")
        raise HTTPInternalServerError()

    schema2 = isSchema2Id(root_id)

    if schema2 and not isRootObjId(root_id):
        log.error(f"Expected root id for flush but got: {root_id}")
        raise HTTPInternalServerError()

    flush_start = time.time()
    flush_set = set()
    dirty_ids = app["dirty_ids"]

    for obj_id in dirty_ids:
        if schema2:
            if isValidUuid(obj_id) and getRootObjId(obj_id) == root_id:
                flush_set.add(obj_id)
        else:
            # for schema1 not easy to determine if a given id is in a domain,
            # so just wait on all of them
            flush_set.add(obj_id)

    log.debug(f"flushop - waiting on {len(flush_set)} items")
    while time.time() - flush_start < FLUSH_TIME_OUT:
        # check to see if the items in our flush set are still there

        remaining_set = set()
        for obj_id in flush_set:
            if not obj_id in dirty_ids:
                log.debug(f"flush - {obj_id} has been written")
            elif dirty_ids[obj_id] > flush_start:
                log.debug(
                    f"flush - {obj_id} has been updated after flush start")
            else:
                log.debug(f"flush - {obj_id} still pending")
                remaining_set.add(obj_id)
        flush_set = remaining_set
        if len(flush_set) == 0:
            log.debug("flush op - all objects have been written")
            break
        log.debug(
            f"flushop - {len(flush_set)} item remaining, sleeping for {FLUSH_SLEEP_INTERVAL}"
        )
        await asyncio.sleep(FLUSH_SLEEP_INTERVAL)

    if len(flush_set) > 0:
        log.warn(
            f"flushop - {len(flush_set)} items not updated after {FLUSH_TIME_OUT}"
        )
        raise HTTPServiceUnavailable()

    resp = json_response(None, status=204)  # NO Content response
    log.response(request, resp=resp)
    return resp
예제 #22
0
async def GET_ACLs(request):
    """HTTP method to return JSON for domain/ACLs"""
    log.request(request)
    app = request.app

    (username, pswd) = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    try:
        domain = getDomainFromRequest(request)
    except ValueError:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(message=msg)

    # use reload to get authoritative domain json
    try:
        domain_json = await getDomainJson(app, domain, reload=True)
    except ClientResponseError:
        log.warn("domain not found")
        log.warn(msg)
        raise HTTPNotFound()

    if 'owner' not in domain_json:
        log.error("No owner key found in domain")
        raise HTTPInternalServerError()

    if 'acls' not in domain_json:
        log.error("No acls key found in domain")
        raise HTTPInternalServerError()

    acls = domain_json["acls"]

    log.debug("got domain_json: {}".format(domain_json))
    # validate that the requesting user has permission to read this domain
    aclCheck(domain_json, "readACL",
             username)  # throws exception if not authorized

    acl_list = []
    acl_usernames = list(acls.keys())
    acl_usernames.sort()
    for acl_username in acl_usernames:
        entry = {"userName": acl_username}
        acl = acls[acl_username]

        for k in acl.keys():
            entry[k] = acl[k]
        acl_list.append(entry)
    # return just the keys as per the REST API
    rsp_json = {}
    rsp_json["acls"] = acl_list

    hrefs = []
    hrefs.append({'rel': 'self', 'href': getHref(request, '/acls')})
    if "root" in domain_json:
        hrefs.append({
            'rel': 'root',
            'href': getHref(request, '/groups/' + domain_json["root"])
        })
    hrefs.append({'rel': 'home', 'href': getHref(request, '/')})
    hrefs.append({'rel': 'owner', 'href': getHref(request, '/')})
    rsp_json["hrefs"] = hrefs

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
예제 #23
0
async def GET_Chunk(request):
    log.request(request)
    app = request.app 
    params = request.rel_url.query

    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    
    validateInPartition(app, chunk_id)
    log.debug("request params: {}".format(list(params.keys())))
    if "dset" not in params:
        msg = "Missing dset in GET request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    dset_json = json.loads(params["dset"])
    
    log.debug("dset_json: {}".format(dset_json)) 
    type_json = dset_json["type"]
     
    dims = getChunkLayout(dset_json)
    log.debug("got dims: {}".format(dims))
    rank = len(dims)  
         
    # get chunk selection from query params
    if "select" in params:
        log.debug("select: {}".format(params["select"]))
    selection = []
    for i in range(rank):
        dim_slice = getSliceQueryParam(request, i, dims[i])
        selection.append(dim_slice)   
    selection = tuple(selection)  
    log.debug("got selection: {}".format(selection))

    dt = createDataType(type_json)
    log.debug("dtype: {}".format(dt))

    rank = len(dims)
    if rank == 0:
        msg = "No dimension passed to GET chunk request"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    if len(selection) != rank:
        msg = "Selection rank does not match shape rank"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    for i in range(rank):
        s = selection[i]
        log.debug("selection[{}]: {}".format(i, s))

    s3path = None
    s3offset = 0
    s3size = 0
    if "s3path" in params:
        s3path = params["s3path"]
        log.debug(f"GET_Chunk - useing s3path: {s3path}")
    if "s3offset" in params:
        try:
            s3offset = int(params["s3offset"])
        except ValueError:
            log.error(f"invalid s3offset params: {params['s3offset']}")
            raise HTTPBadRequest()
    if "s3size" in params:
        try:
            s3size = int(params["s3size"])
        except ValueError:
            log.error(f"invalid s3size params: {params['s3sieze']}")
            raise HTTPBadRequest()

    chunk_arr = await getChunk(app, chunk_id, dset_json, s3path=s3path, s3offset=s3offset, s3size=s3size)

    if chunk_arr is None:
        # return a 404
        msg = "Chunk {} does not exist".format(chunk_id)
        log.info(msg)
        raise HTTPNotFound()
     
    resp = None
    
    if "query" in params:
        # do query selection
        query = params["query"]
        log.info("query: {}".format(query))
        if rank != 1:
            msg = "Query selection only supported for one dimensional arrays"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        limit = 0
        if "Limit" in params:
            limit = int(params["Limit"])

        values = []
        indices = []
        field_names = [] 
        if dt.fields:
            field_names = list(dt.fields.keys())

        x = chunk_arr[selection]
        log.debug("x: {}".format(x))
        eval_str = getEvalStr(query, "x", field_names)
        log.debug("eval_str: {}".format(eval_str))
        where_result = np.where(eval(eval_str))
        log.debug("where_result: {}".format(where_result))
        where_result_index = where_result[0]
        log.debug("whare_result index: {}".format(where_result_index))
        log.debug("boolean selection: {}".format(x[where_result_index]))
        s = selection[0]
        count = 0
        for index in where_result_index:
            log.debug("index: {}".format(index))
            value = x[index].tolist()
            log.debug("value: {}".format(value))
            json_val = bytesArrayToList(value)
            log.debug("json_value: {}".format(json_val))
            json_index = index.tolist() * s.step + s.start  # adjust for selection
            indices.append(json_index)
            values.append(json_val)
            count += 1
            if limit > 0 and count >= limit:
                log.info("got limit items")
                break
         
        query_result = {}
        query_result["index"] = indices
        query_result["value"] = values
        log.info(f"query_result retiurning: {len(indices)} rows")
        log.debug(f"query_result: {query_result}")
        resp = json_response(query_result)
    else:
        # get requested data
        output_arr = chunk_arr[selection]
        output_data = arrayToBytes(output_arr)

        # write response
        try:
            resp = StreamResponse()
            resp.headers['Content-Type'] = "application/octet-stream"
            resp.content_length = len(output_data)
            await resp.prepare(request)
            await resp.write(output_data)
        except Exception as e:
            log.error(f"Exception during binary data write: {e}")
            raise HTTPInternalServerError()

        finally:
            await resp.write_eof()

    return resp
예제 #24
0
async def PUT_DatasetShape(request):
    """HTTP method to update dataset's shape"""
    log.request(request)
    app = request.app
    dset_id = request.match_info.get('id')

    if not isValidUuid(dset_id, obj_class="dataset"):
        log.error("Unexpected type_id: {}".format(dset_id))
        raise HTTPInternalServerError()

    body = await request.json()

    log.info("PUT datasetshape: {}, body: {}".format(dset_id, body))

    if "shape" not in body and "extend" not in body:
        log.error("Expected shape or extend keys")
        raise HTTPInternalServerError()

    dset_json = await get_metadata_obj(app, dset_id)

    shape_orig = dset_json["shape"]
    log.debug("shape_orig: {}".format(shape_orig))

    if "maxdims" not in shape_orig:
        log.error("expected maxdims in dataset json")
        raise HTTPInternalServerError()

    dims = shape_orig["dims"]
    maxdims = shape_orig["maxdims"]

    resp_json = {}

    if "extend" in body:
        # extend the shape by the give value and return the
        # newly extended area
        extension = body["extend"]
        extend_dim = 0

        if "extend_dim" in body:
            extend_dim = body["extend_dim"]
        log.info(f"datashape extend: {extension} dim: {extend_dim}")

        selection = "["
        for i in range(len(dims)):
            if i == extend_dim:
                lb = dims[i]
                ub = lb + extension
                if maxdims[extend_dim] != 0 and ub > maxdims[extend_dim]:
                    msg = "maximum extent exceeded"
                    log.warn(msg)
                    raise HTTPConflict()

                selection += f"{lb}:{ub}"
                dims[i] = ub
            else:
                if dims[i] == 0:
                    dims[i] = 1  # each dimension must be non-zero
                selection += ":"
            if i < len(dims) - 1:
                selection += ","
        selection += "]"
        resp_json["selection"] = selection

    else:
        # verify that the extend request is still valid
        # e.g. another client has already extended the shape since the SN
        # verified it
        shape_update = body["shape"]
        log.debug("shape_update: {}".format(shape_update))

        for i in range(len(dims)):
            if shape_update[i] < dims[i]:
                msg = "Dataspace can not be made smaller"
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)

        # Update the shape!
        for i in range(len(dims)):
            dims[i] = shape_update[i]

    # write back to S3, save to metadata cache
    log.info(f"Updated dimensions: {dims}")
    await save_metadata_obj(app, dset_id, dset_json)

    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
예제 #25
0
async def getChunk(app, chunk_id, dset_json, s3path=None, s3offset=0, s3size=0, chunk_init=False):
    # if the chunk cache has too many dirty items, wait till items get flushed to S3
    MAX_WAIT_TIME = 10.0  # TBD - make this a config
    chunk_cache = app['chunk_cache']
    if chunk_init and s3offset > 0:
        log.error(f"unable to initiale chunk {chunk_id} for reference layouts ")
        raise  HTTPInternalServerError()

    log.debug(f"getChunk cache utilization: {chunk_cache.cacheUtilizationPercent} per, dirty_count: {chunk_cache.dirtyCount}, mem_dirty: {chunk_cache.memDirty}")

    chunk_arr = None 
    dset_id = getDatasetId(chunk_id)
    dims = getChunkLayout(dset_json)
    type_json = dset_json["type"]
    dt = createDataType(type_json)

    bucket = None
    s3key = None

    if s3path:
        if not s3path.startswith("s3://"):
            # TBD - verify these at dataset creation time?
            log.error(f"unexpected s3path for getChunk: {s3path}")
            raise  HTTPInternalServerError()
        path = s3path[5:]
        index = path.find('/')   # split bucket and key
        if index < 1:
            log.error(f"s3path is invalid: {s3path}")
            raise HTTPInternalServerError()
        bucket = path[:index]
        log.debug(f"using bucket: {bucket}")
        s3key = path[(index+1):]
        log.debug(f"Using bucket: {bucket} and  s3key: {s3key}")
    else:
        s3key = getS3Key(chunk_id)

    log.debug("getChunk s3key: {}".format(s3key))
    if chunk_id in chunk_cache:
        chunk_arr = chunk_cache[chunk_id]
    else:
        if s3path and s3size == 0:
            obj_exists = False
        else:
            obj_exists = await isS3Obj(app, s3key, bucket=bucket)
        # TBD - potential race condition?
        if obj_exists:
            pending_s3_read = app["pending_s3_read"]
            if chunk_id in pending_s3_read:
                # already a read in progress, wait for it to complete
                read_start_time = pending_s3_read[chunk_id]
                log.info(f"s3 read request for {chunk_id} was requested at: {read_start_time}")
                while time.time() - read_start_time < 2.0:
                    log.debug("waiting for pending s3 read, sleeping")
                    await asyncio.sleep(1)  # sleep for sub-second?
                    if chunk_id in chunk_cache:
                        log.info(f"Chunk {chunk_id} has arrived!")
                        chunk_arr = chunk_cache[chunk_id]
                        break
                if chunk_arr is None:
                    log.warn(f"s3 read for chunk {chunk_id} timed-out, initiaiting a new read")
            
            if chunk_arr is None:
                if chunk_id not in pending_s3_read:
                    pending_s3_read[chunk_id] = time.time()
                log.debug("Reading chunk {} from S3".format(s3key))
                deflate_level = getDeflate(app, dset_id, dset_json)
                chunk_bytes = await getS3Bytes(app, s3key, deflate_level=deflate_level, s3offset=s3offset, s3size=s3size, bucket=bucket)
                if chunk_id in pending_s3_read:
                    # read complete - remove from pending map
                    elapsed_time = time.time() - pending_s3_read[chunk_id]
                    log.info(f"s3 read for {s3key} took {elapsed_time}")
                    del pending_s3_read[chunk_id] 
                else:
                    log.warn(f"expected to find {chunk_id} in pending_s3_read map")

            
            chunk_arr = np.fromstring(chunk_bytes, dtype=dt)
            chunk_arr = chunk_arr.reshape(dims)

            log.debug("chunk size: {}".format(chunk_arr.size)) 
           
        elif chunk_init:
            log.debug("Initializing chunk {chunk_id}")
            fill_value = getFillValue(dset_json)
            if fill_value:
                # need to convert list to tuples for numpy broadcast
                if isinstance(fill_value, list):
                    fill_value = tuple(fill_value)
                chunk_arr = np.empty(dims, dtype=dt, order='C')
                chunk_arr[...] = fill_value
            else:
                chunk_arr = np.zeros(dims, dtype=dt, order='C')
        else:
            log.debug(f"Chunk {chunk_id} not found")
            
        if chunk_arr is not None:
            # check that there's room in the cache before adding it
            if chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                # no room in the cache, wait till space is freed by the s3sync task
                wait_start = time.time()
                while chunk_cache.memTarget - chunk_cache.memDirty < chunk_arr.size:
                    log.warn(f"getChunk, cache utilization: {chunk_cache.cacheUtilizationPercent}, sleeping till items are flushed")
                    if time.time() - wait_start > MAX_WAIT_TIME:
                        log.error(f"unable to save updated chunk {chunk_id} to cache returning 503 error")
                        raise HTTPServiceUnavailable()
                    await asyncio.sleep(1)
                 
            chunk_cache[chunk_id] = chunk_arr  # store in cache
    return chunk_arr
예제 #26
0
async def POST_Dataset(request):
    """ Handler for POST /datasets"""
    log.request(request)
    app = request.app

    if not request.has_body:
        msg = "POST_Dataset with no body"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()
    log.info("POST_Dataset, body: {}".format(body))

    dset_id = get_obj_id(request, body=body)
    if not isValidUuid(dset_id, obj_class="dataset"):
        log.error("Unexpected dataset_id: {}".format(dset_id))
        raise HTTPInternalServerError()

    # verify the id doesn't already exist
    obj_found = await check_metadata_obj(app, dset_id)
    if obj_found:
        log.error("Post with existing dset_id: {}".format(dset_id))
        raise HTTPInternalServerError()

    if "root" not in body:
        msg = "POST_Dataset with no root"
        log.error(msg)
        raise HTTPInternalServerError()
    root_id = body["root"]
    try:
        validateUuid(root_id, "group")
    except ValueError:
        msg = "Invalid root_id: " + root_id
        log.error(msg)
        raise HTTPInternalServerError()

    if "type" not in body:
        msg = "POST_Dataset with no type"
        log.error(msg)
        raise HTTPInternalServerError()
    type_json = body["type"]
    if "shape" not in body:
        msg = "POST_Dataset with no shape"
        log.error(msg)
        raise HTTPInternalServerError()
    shape_json = body["shape"]

    layout = None
    if "layout" in body:
        layout = body["layout"]  # client specified chunk layout

    # ok - all set, create committed type obj
    now = int(time.time())

    log.debug("POST_dataset typejson: {}, shapejson: {}".format(
        type_json, shape_json))

    dset_json = {
        "id": dset_id,
        "root": root_id,
        "created": now,
        "lastModified": now,
        "type": type_json,
        "shape": shape_json,
        "attributes": {}
    }
    if "creationProperties" in body:
        dset_json["creationProperties"] = body["creationProperties"]
    if layout is not None:
        dset_json["layout"] = layout

    await save_metadata_obj(app, dset_id, dset_json, notify=True, flush=True)

    resp_json = {}
    resp_json["id"] = dset_id
    resp_json["root"] = root_id
    resp_json["created"] = dset_json["created"]
    resp_json["type"] = type_json
    resp_json["shape"] = shape_json
    resp_json["lastModified"] = dset_json["lastModified"]
    resp_json["attributeCount"] = 0

    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
예제 #27
0
파일: link_sn.py 프로젝트: paulmueller/hsds
async def PUT_Link(request):
    """HTTP method to create a new link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    log.info("PUT Link_title: [{}]".format(link_title))
    validateLinkName(link_title)

    username, pswd = getUserPasswordFromRequest(request)
    # write actions need auth
    await validateUserPassword(app, username, pswd)

    if not request.has_body:
        msg = "PUT Link with no body"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    body = await request.json()

    link_json = {}
    if "id" in body:
        if not isValidUuid(body["id"]):
            msg = "PUT Link with invalid id in body"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        link_json["id"] = body["id"]
        link_json["class"] = "H5L_TYPE_HARD"

    elif "h5path" in body:
        link_json["h5path"] = body["h5path"]
        # could be hard or soft link
        if "h5domain" in body:
            link_json["h5domain"] = body["h5domain"]
            link_json["class"] = "H5L_TYPE_EXTERNAL"
        else:
            # soft link
            link_json["class"] = "H5L_TYPE_SOFT"
    else:
        msg = "PUT Link with no id or h5path keys"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    await validateAction(app, domain, group_id, username, "create")

    # for hard links, verify that the referenced id exists and is in this domain
    if "id" in body:
        ref_id = body["id"]
        ref_json = await getObjectJson(app, ref_id)
        group_json = await getObjectJson(app, group_id)
        if ref_json["root"] != group_json["root"]:
            msg = "Hard link must reference an object in the same domain"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

    # ready to add link now
    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    log.debug("PUT link - getting group: " + req)

    put_rsp = await http_put(app, req, data=link_json)
    log.debug("PUT Link resp: " + str(put_rsp))

    hrefs = []  # TBD
    req_rsp = {"hrefs": hrefs}
    # link creation successful
    resp = await jsonResponse(request, req_rsp, status=201)
    log.response(request, resp=resp)
    return resp
예제 #28
0
def getSliceQueryParam(request, dim, extent, body=None):
    # Get optional query parameters for given dim
    log.debug("getSliceQueryParam: " + str(dim) + ", " + str(extent))
    params = request.rel_url.query

    start = 0
    stop = extent
    step = 1

    if body and "start" in body:
        # look for start params in body JSON
        start_val = body["start"]
        if isinstance(start_val, (list, tuple)):
            if len(start_val) < dim:
                msg = "Not enough dimensions supplied to body start key"
                log.arn(msg)
                raise HTTPBadRequest(reason=msg)
            start = start_val[dim]
        else:
            start = start_val

    if body and "stop" in body:
        stop_val = body["stop"]
        if isinstance(stop_val, (list, tuple)):
            if len(stop_val) < dim:
                msg = "Not enough dimensions supplied to body stop key"
                log.arn(msg)
                raise HTTPBadRequest(reason=msg)
            stop = stop_val[dim]
        else:
            stop = stop_val
    if body and "step" in body:
        step_val = body["step"]
        if isinstance(step_val, (list, tuple)):
            if len(step_val) < dim:
                msg = "Not enough dimensions supplied to body step key"
                log.arn(msg)
                raise HTTPBadRequest(reason=msg)
            step = step_val[dim]
        else:
            step = step_val

    if "select" in params:
        query = params["select"]
        log.debug("select query value:" + query)

        if not query.startswith('['):
            msg = "Bad Request: selection query missing start bracket"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        if not query.endswith(']'):
            msg = "Bad Request: selection query missing end bracket"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)

        # now strip out brackets
        query = query[1:-1]

        query_array = query.split(',')
        if dim >= len(query_array):
            msg = "Not enough dimensions supplied to query argument"
            log.warn(msg)
            raise HTTPBadRequest(reason=msg)
        dim_query = query_array[dim].strip()

        if dim_query.find(':') < 0:
            # just a number - return start = stop for this value
            try:
                start = int(dim_query)
            except ValueError:
                msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(
                    dim)
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            stop = start
        elif dim_query == ':':
            # select everything
            pass
        else:
            fields = dim_query.split(":")
            log.debug("got fields: {}".format(fields))
            if len(fields) > 3:
                msg = "Bad Request: Too many ':' seperators for dimension: " + str(
                    dim)
                log.warn(msg)
                raise HTTPBadRequest(reason=msg)
            try:
                if fields[0]:
                    start = int(fields[0])
                if fields[1]:
                    stop = int(fields[1])
                if len(fields) > 2 and fields[2]:
                    step = int(fields[2])
            except ValueError:
                msg = "Bad Request: invalid selection parameter (can't convert to int) for dimension: " + str(
                    dim)
                log.info(msg)
                raise HTTPBadRequest(reason=msg)
    log.debug("start: {}, stop: {}, step: {}".format(start, stop, step))
    # now, validate whaterver start/stop/step values we got
    if start < 0 or start > extent:
        msg = "Bad Request: Invalid selection start parameter for dimension: " + str(
            dim)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if stop > extent:
        msg = "Bad Request: Invalid selection stop parameter for dimension: " + str(
            dim)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if step <= 0:
        msg = "Bad Request: invalid selection step parameter for dimension: " + str(
            dim)
        log.debug(msg)
        raise HTTPBadRequest(reason=msg)
    s = slice(start, stop, step)
    log.debug("dim query[" + str(dim) + "] returning: start: " + str(start) +
              " stop: " + str(stop) + " step: " + str(step))
    return s
예제 #29
0
async def get_metadata_obj(app, obj_id, bucket=None):
    """ Get object from metadata cache (if present).
        Otherwise fetch from S3 and add to cache
    """
    log.info(f"get_metadata_obj: {obj_id} bucket: {bucket}")
    validateObjId(obj_id, bucket)  # throws internal server error if invalid
    if isValidDomain(obj_id):
        bucket = getBucketForDomain(obj_id)
    """
    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error("Domain not in partition")
        raise HTTPInternalServerError() 
    """

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        msg = f"{obj_id} has been deleted"
        log.warn(msg)
        raise HTTPGone()

    meta_cache = app['meta_cache']
    obj_json = None
    if obj_id in meta_cache:
        log.debug(f"{obj_id} found in meta cache")
        obj_json = meta_cache[obj_id]
    else:
        s3_key = getS3Key(obj_id)
        pending_s3_read = app["pending_s3_read"]
        if obj_id in pending_s3_read:
            # already a read in progress, wait for it to complete
            read_start_time = pending_s3_read[obj_id]
            log.info(
                f"s3 read request for {s3_key} was requested at: {read_start_time}"
            )
            while time.time() - read_start_time < 2.0:
                log.debug("waiting for pending s3 read, sleeping")
                await asyncio.sleep(1)  # sleep for sub-second?
                if obj_id in meta_cache:
                    log.info(f"object {obj_id} has arrived!")
                    obj_json = meta_cache[obj_id]
                    break
            if not obj_json:
                log.warn(
                    f"s3 read for object {s3_key} timed-out, initiaiting a new read"
                )

        # invoke S3 read unless the object has just come in from pending read
        if not obj_json:
            log.debug(f"getS3JSONObj({s3_key}, bucket={bucket})")
            if obj_id not in pending_s3_read:
                pending_s3_read[obj_id] = time.time()
            # read S3 object as JSON
            obj_json = await getS3JSONObj(app, s3_key, bucket=bucket)
            if obj_id in pending_s3_read:
                # read complete - remove from pending map
                elapsed_time = time.time() - pending_s3_read[obj_id]
                log.info(f"s3 read for {s3_key} took {elapsed_time}")
                del pending_s3_read[obj_id]
            meta_cache[obj_id] = obj_json  # add to cache
    return obj_json
예제 #30
0
파일: s3Util.py 프로젝트: paulmueller/hsds
def getS3Client(app):
    """ Return s3client handle
    """

    if "session" not in app:
        # app startup should have set this
        raise KeyError("Session not initialized")
    session = app["session"]

    if "s3" in app:
        if "token_expiration" in app:
            # check that our token is not about to expire
            expiration = app["token_expiration"]
            now = datetime.datetime.now()
            delta = expiration - now
            if delta.total_seconds() > 10:
                return app["s3"]
            # otherwise, fall through and get a new token
            log.info("S3 access token has expired - renewing")
        else:
            return app["s3"]
    
    # first time setup of s3 client or limited time token has expired
    aws_region = config.get("aws_region")
    log.info(f"aws_region {aws_region}")
    aws_secret_access_key = None
    aws_access_key_id = None 
    aws_session_token = None
    aws_iam_role = config.get("aws_iam_role")
    aws_secret_access_key = config.get("aws_secret_access_key")
    aws_access_key_id = config.get("aws_access_key_id")
    if not aws_secret_access_key or aws_secret_access_key == 'xxx':
        log.info("aws secret access key not set")
        aws_secret_access_key = None
    if not aws_access_key_id or aws_access_key_id == 'xxx':
        log.info("aws access key id not set")
        aws_access_key_id = None
  
    if aws_iam_role and not aws_secret_access_key:
        log.info("using iam role: {}".format(aws_iam_role))
        log.info("getting EC2 IAM role credentials")
        # Use EC2 IAM role to get credentials
        # See: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html?icmpid=docs_ec2_console
        curl_cmd = ["curl", "http://169.254.169.254/latest/meta-data/iam/security-credentials/{}".format(aws_iam_role)]
        p = subprocess.run(curl_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        if p.returncode != 0:
            msg = "Error getting IAM role credentials: {}".format(p.stderr)
            log.error(msg)
        else:
            stdout = p.stdout.decode("utf-8")
            try:
                cred = json.loads(stdout)
                aws_secret_access_key = cred["SecretAccessKey"]
                aws_access_key_id = cred["AccessKeyId"]
                log.info("Got ACCESS_KEY_ID: {} from EC2 metadata".format(aws_access_key_id))     
                aws_session_token = cred["Token"]
                log.info("Got Expiration of: {}".format(cred["Expiration"]))
                expiration_str = cred["Expiration"][:-1] + "UTC" # trim off 'Z' and add 'UTC'
                # save the expiration
                app["token_expiration"] = datetime.datetime.strptime(expiration_str, "%Y-%m-%dT%H:%M:%S%Z")
            except json.JSONDecodeError:
                msg = "Unexpected error decoding EC2 meta-data response"
                log.error(msg)
            except KeyError:
                msg = "Missing expected key from EC2 meta-data response"
                log.error(msg)
       
    s3_gateway = config.get('aws_s3_gateway')
    if not s3_gateway:
        msg="Invalid aws s3 gateway"
        log.error(msg)
        raise ValueError(msg)
    if s3_gateway[0] == '[' and s3_gateway[-1] == ']':
        # convert string to a comma separated list
        items = s3_gateway[1:-1].split(',')
        s3_gateway = []
        for item in items:
            s3_gateway.append(item.strip())
    if isinstance(s3_gateway, list):
        # use the node number to select an item from the list
        node_number = 0
        if "node_number" in app:
            node_number = app["node_number"]
        item = s3_gateway[node_number % len(s3_gateway)]
        log.debug(f"selecting: {item} from s3_gateway list: {s3_gateway}")
        s3_gateway = item
    log.info(f"Using S3Gateway: {s3_gateway}")
    use_ssl = False
    if s3_gateway.startswith("https"):
        use_ssl = True
    max_pool_connections = config.get('aio_max_pool_connections')
    aio_config = AioConfig(max_pool_connections=max_pool_connections)
    s3 = session.create_client('s3', region_name=aws_region,
                                   aws_secret_access_key=aws_secret_access_key,
                                   aws_access_key_id=aws_access_key_id,
                                   aws_session_token=aws_session_token,
                                   endpoint_url=s3_gateway,
                                   use_ssl=use_ssl,
                                   config=aio_config)

    app['s3'] = s3  # save so same client can be returned in subsequent calls

    return s3