Beispiel #1
0
def main():

    if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or
                                                    sys.argv[1] == "--help"):
        printUsage()

    rootid = sys.argv[1]

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session(loop=loop)
    app["session"] = session
    loop.run_until_complete(run_delete(app, rootid))

    loop.close()

    print("done!")
Beispiel #2
0
async def PUT_Object(request):
    """HTTP method to notify creation/update of objid"""
    log.request(request)
    app = request.app
    pending_set = app["pending"]
    objid = request.match_info.get('id')
    if not objid:
        log.error("PUT_Object with no id")
        raise HTTPBadRequest()

    log.info(f"PUT_Object/{objid}")
 
    if not isValidUuid(objid):
        log.warn(f"Invalid id: {objid}, ignoring")
        raise HTTPBadRequest()

    if isSchema2Id(objid):
        rootid = getRootObjId(objid)
        log.debug(f"adding root: {rootid} to pending queue for objid: {objid}")
        pending_set.add(rootid) 

    resp_json = {  } 
    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
Beispiel #3
0
async def GET_Group(request):
    """HTTP GET method to return JSON for /groups/
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    log.info(f"GET group: {group_id} bucket: {bucket}")

    if not isValidUuid(group_id, obj_class="group"):
        log.error("Unexpected group_id: {}".format(group_id))
        raise HTTPInternalServerError()

    group_json = await get_metadata_obj(app, group_id, bucket=bucket)

    resp_json = {}
    resp_json["id"] = group_json["id"]
    resp_json["root"] = group_json["root"]
    resp_json["created"] = group_json["created"]
    resp_json["lastModified"] = group_json["lastModified"]
    resp_json["linkCount"] = len(group_json["links"])
    resp_json["attributeCount"] = len(group_json["attributes"])

    if "include_links" in params and params["include_links"]:
        resp_json["links"] = group_json["links"]
    if "include_attrs" in params and params["include_attrs"]:
        resp_json["attributes"] = group_json["attributes"]

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #4
0
async def DELETE_Dataset(request):
    """HTTP DELETE method for dataset
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    dset_id = request.match_info.get('id')
    log.info("DELETE dataset: {}".format(dset_id))

    if not isValidUuid(dset_id, obj_class="dataset"):
        log.error("Unexpected dataset id: {}".format(dset_id))
        raise HTTPInternalServerError()

    # verify the id  exist
    obj_found = await check_metadata_obj(app, dset_id)
    if not obj_found:
        raise HTTPNotFound()

    log.debug("deleting dataset: {}".format(dset_id))

    notify = True
    if "Notify" in params and not params["Notify"]:
        notify = False
    await delete_metadata_obj(app, dset_id, notify=notify)

    resp_json = {}

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #5
0
async def GET_Datatype(request):
    """HTTP GET method to return JSON for /groups/
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    ctype_id = get_obj_id(request)  
    
    if not isValidUuid(ctype_id, obj_class="type"):
        log.error(f"Unexpected type_id: {ctype_id}")
        raise HTTPInternalServerError()

    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    ctype_json = await get_metadata_obj(app, ctype_id, bucket=bucket)

    resp_json = { } 
    resp_json["id"] = ctype_json["id"]
    resp_json["root"] = ctype_json["root"]
    resp_json["created"] = ctype_json["created"]
    resp_json["lastModified"] = ctype_json["lastModified"]
    resp_json["type"] = ctype_json["type"]
    resp_json["attributeCount"] = len(ctype_json["attributes"])
    if "include_attrs" in params and params["include_attrs"]:
        resp_json["attributes"] = ctype_json["attributes"]
     
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #6
0
async def check_metadata_obj(app, obj_id):
    """ Return False is obj does not exist
    """
    if not isValidDomain(obj_id) and not isValidUuid(obj_id):
        msg = "Invalid obj id: {}".format(obj_id)
        log.error(msg)
        raise HTTPInternalServerError()

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error("Domain not in partition")
        raise HTTPInternalServerError()

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        msg = "{} has been deleted".format(obj_id)
        log.info(msg)
        return False

    meta_cache = app['meta_cache']
    if obj_id in meta_cache:
        found = True
    else:
        # Not in chache, check s3 obj exists
        s3_key = getS3Key(obj_id)
        log.debug("check_metadata_obj({})".format(s3_key))
        # does key exist?
        found = await isS3Obj(app, s3_key)
    return found
Beispiel #7
0
async def GET_Dataset(request):
    """HTTP GET method to return JSON for /groups/
    """
    log.request(request)
    app = request.app
    dset_id = get_obj_id(request)

    if not isValidUuid(dset_id, obj_class="dataset"):
        log.error("Unexpected type_id: {}".format(dset_id))
        raise HTTPInternalServerError()

    dset_json = await get_metadata_obj(app, dset_id)

    resp_json = {}
    resp_json["id"] = dset_json["id"]
    resp_json["root"] = dset_json["root"]
    resp_json["created"] = dset_json["created"]
    resp_json["lastModified"] = dset_json["lastModified"]
    resp_json["type"] = dset_json["type"]
    resp_json["shape"] = dset_json["shape"]
    resp_json["attributeCount"] = len(dset_json["attributes"])
    if "creationProperties" in dset_json:
        resp_json["creationProperties"] = dset_json["creationProperties"]
    if "layout" in dset_json:
        resp_json["layout"] = dset_json["layout"]

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #8
0
async def DELETE_Group(request):
    """HTTP DELETE method for /groups/
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    log.info("DELETE group: {}".format(group_id))

    if not isValidUuid(group_id, obj_class="group"):
        log.error("Unexpected group_id: {}".format(group_id))
        raise HTTPInternalServerError()

    # verify the id exist
    obj_found = await check_metadata_obj(app, group_id)
    if not obj_found:
        log.debug(f"delete called on non-exsistet obj: {group_id}")
        raise HTTPNotFound()

    log.debug("deleting group: {}".format(group_id))

    notify = True
    if "Notify" in params and not params["Notify"]:
        notify = False
    await delete_metadata_obj(app, group_id, notify=notify)

    resp_json = {}

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #9
0
async def GET_Group(request):
    """HTTP GET method to return JSON for /groups/
    """
    log.request(request)
    app = request.app
    group_id = get_obj_id(request)
    log.info("GET group: {}".format(group_id))

    if not isValidUuid(group_id, obj_class="group"):
        log.error("Unexpected group_id: {}".format(group_id))
        raise HTTPInternalServerError()

    group_json = await get_metadata_obj(app, group_id)

    resp_json = {}
    resp_json["id"] = group_json["id"]
    resp_json["root"] = group_json["root"]
    resp_json["created"] = group_json["created"]
    resp_json["lastModified"] = group_json["lastModified"]
    resp_json["linkCount"] = len(group_json["links"])
    resp_json["attributeCount"] = len(group_json["attributes"])

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #10
0
async def DELETE_Link(request):
    """HTTP method to delete a link"""
    log.request(request)
    app = request.app

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, obj_class="Group"):
        msg = "Invalid group id: {}".format(group_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    await validateAction(app, domain, group_id, username, "delete")

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id + "/links/" + link_title
    rsp_json = await http_delete(app, req)

    resp = await jsonResponse(request, rsp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #11
0
def get_obj_id(request, body=None):
    """ Get object id from request 
        Raise HTTPException on errors.
    """

    obj_id = None
    collection = None
    app = request.app
    if body and "id" in body:
        obj_id = body["id"]
    else:
        collection = getRequestCollectionName(
            request)  # returns datasets|groups|datatypes
        obj_id = request.match_info.get('id')

    if not obj_id:
        msg = "Missing object id"
        log.error(msg)
        raise HTTPInternalServerError()

    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid obj id: {obj_id}"
        log.error(msg)
        raise HTTPInternalServerError()

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error("Domain not in partition")
        raise HTTPInternalServerError()

    return obj_id
Beispiel #12
0
async def DELETE_Object(request):
    log.request(request)

    app = request.app
    delete_set = app["delete_set"]

    objid = request.match_info.get('id')
    if not isValidUuid(objid):
        log.warn(f"Invalid id: {objid}")
        raise HTTPBadRequest()

    if isSchema2Id(objid):
        # get rootid for this id
        collection = getCollectionForId(objid)
        if collection == "datasets":
            delete_set.add(objid)
        elif collection == "groups":
            # only need to do anything if this the root group
            if isRootObjId(objid):
                log.info(f"adding root group: {objid} to delete_set")
                delete_set.add(objid)
            else:
                log.info(f"ignoring delete non-root group: {objid}")
        elif collection == "datatypes":
            log.info(f"ignoring delete for datatype object: {objid}")
        else:
            log.error(f"Unexpected collection type: {collection}")
         
    resp_json = {}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #13
0
async def DELETE_Group(request):
    """HTTP method to delete a group resource"""
    log.request(request)
    app = request.app
    meta_cache = app['meta_cache']

    group_id = request.match_info.get('id')
    if not group_id:
        msg = "Missing group id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(group_id, "Group"):
        msg = f"Invalid group id: {group_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    # get domain JSON
    domain_json = await getDomainJson(app, domain)

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, group_id, username, "delete")

    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    if group_id == domain_json["root"]:
        msg = "Forbidden - deletion of root group is not allowed - delete domain first"
        log.warn(msg)
        raise HTTPForbidden()

    req = getDataNodeUrl(app, group_id)
    req += "/groups/" + group_id
    params = {}
    if bucket:
        params["bucket"] = bucket
    log.debug(f"http_delete req: {req} params: {params}")

    await http_delete(app, req, params=params)

    if group_id in meta_cache:
        del meta_cache[group_id]  # remove from cache

    resp = await jsonResponse(request, {})
    log.response(request, resp=resp)
    return resp
Beispiel #14
0
async def DELETE_Chunk(request):
    """HTTP DELETE method for /chunks/
    Note: clients (i.e. SN nodes) don't directly delete chunks.  This method should
    only be called by the AN node.
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info(f"DELETE chunk: {chunk_id}")

    if not isValidUuid(chunk_id, "Chunk"):
        msg = f"Invalid chunk id: {chunk_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    validateInPartition(app, chunk_id)

    chunk_cache = app['chunk_cache']
    s3key = getS3Key(chunk_id)
    log.debug(f"DELETE_Chunk s3_key: {s3key}")

    if chunk_id in chunk_cache:
        del chunk_cache[chunk_id]

    deflate_map = app["deflate_map"]
    shuffle_map = app["shuffle_map"]
    dset_id = getDatasetId(chunk_id)
    if dset_id in deflate_map:
        # The only reason chunks are ever deleted is if the dataset is being deleted,
        # so it should be safe to remove this entry now
        log.info(f"Removing deflate_map entry for {dset_id}")
        del deflate_map[dset_id]
    if dset_id in shuffle_map:
        log.info(f"Removing shuffle_map entry for {dset_id}")
        del shuffle_map[dset_id]

    if await isStorObj(app, s3key, bucket=bucket):
        await deleteStorObj(app, s3key, bucket=bucket)
    else:
        log.info(
            f"delete_metadata_obj - key {s3key} not found (never written)?")

    resp_json = {}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #15
0
def main():

    if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or
                                                    sys.argv[1] == "--help"):
        printUsage()

    rootid = sys.argv[1]

    if len(sys.argv) > 2 and sys.argv[2] == "-update":
        do_update = True
    else:
        do_update = False

    if not isValidUuid(rootid):
        print("Invalid root id!")
        sys.exit(1)

    if not isSchema2Id(rootid):
        print("This tool can only be used with Schema v2 ids")
        sys.exit(1)

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()

    app = {}
    app["bucket_name"] = config.get("bucket_name")
    app["loop"] = loop
    session = get_session(loop=loop)
    app["session"] = session
    loop.run_until_complete(run_scan(app, rootid=rootid, update=do_update))

    loop.close()

    results = app["scanRoot_results"]
    datasets = results["datasets"]
    lastModified = datetime.fromtimestamp(results["lastModified"])
    total_size = results["metadata_bytes"] + results["allocated_bytes"]
    print(f"lastModified: {lastModified}")
    print(f"size: {total_size}")
    print(f"num chunks: {results['num_chunks']}")
    print(f"num_groups: {results['num_groups']}")
    print(f"num_datatypes: {results['num_datatypes']}")
    print(f"num_datasets: {len(datasets)}")
    for dsetid in datasets:
        dataset_info = datasets[dsetid]
        print(
            f"   {dsetid}: {dataset_info['lastModified']}, {dataset_info['num_chunks']}, {dataset_info['allocated_bytes']}"
        )

    scan_start = datetime.fromtimestamp(results["scan_start"])
    print(f"scan_start: {scan_start}")
    scan_complete = datetime.fromtimestamp(results["scan_complete"])
    print(f"scan_complete: {scan_complete}")

    print("done!")
Beispiel #16
0
async def delete_metadata_obj(app,
                              obj_id,
                              notify=True,
                              root_id=None,
                              bucket=None):
    """ Delete the given object """
    meta_cache = app['meta_cache']
    dirty_ids = app["dirty_ids"]
    log.info(f"delete_meta_data_obj: {obj_id} notify: {notify}")
    validateObjId(obj_id, bucket)
    if isValidDomain(obj_id):
        bucket = getBucketForDomain(obj_id)

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error(f"obj: {obj_id} not in partition")
        raise HTTPInternalServerError()

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        log.warn(f"{obj_id} has already been deleted")
    else:
        log.debug(f"adding {obj_id} to deleted ids")
        deleted_ids.add(obj_id)

    if obj_id in meta_cache:
        log.debug(f"removing {obj_id} from meta_cache")
        del meta_cache[obj_id]

    if obj_id in dirty_ids:
        log.debug(f"removing dirty_ids for: {obj_id}")
        del dirty_ids[obj_id]

    # remove from S3 (if present)
    s3key = getS3Key(obj_id)

    if await isS3Obj(app, s3key, bucket=bucket):
        await deleteS3Obj(app, s3key, bucket=bucket)
    else:
        log.info(
            f"delete_metadata_obj - key {s3key} not found (never written)?")

    if isValidUuid(obj_id) and isSchema2Id(obj_id):
        if isRootObjId(obj_id):
            # add to gc ids so sub-objects will be deleted
            gc_ids = app["gc_ids"]
            log.info(f"adding root id: {obj_id} for GC cleanup")
            gc_ids.add(obj_id)
        elif notify:
            root_id = getRootObjId(obj_id)
            await notify_root(app, root_id, bucket=bucket)
        # no notify for domain deletes since the root group is being deleted

    log.debug(f"delete_metadata_obj for {obj_id} done")
Beispiel #17
0
async def DELETE_Attribute(request):
    """HTTP method to delete a attribute resource"""
    log.request(request)
    app = request.app
    collection = getRequestCollectionName(
        request)  # returns datasets|groups|datatypes

    obj_id = request.match_info.get('id')
    if not obj_id:
        msg = "Missing object id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(obj_id, obj_class=collection):
        msg = f"Invalid object id: {obj_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    attr_name = request.match_info.get('name')
    log.debug(f"Attribute name: [{attr_name}]")
    validateAttributeName(attr_name)

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, obj_id, username, "delete")

    req = getDataNodeUrl(app, obj_id)
    req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name
    log.info("PUT Attribute: " + req)
    params = {}
    if bucket:
        params["bucket"] = bucket
    rsp_json = await http_delete(app, req, params=params)

    log.info(f"PUT Attribute resp: {rsp_json}")

    hrefs = []  # TBD
    req_rsp = {"hrefs": hrefs}
    resp = await jsonResponse(request, req_rsp)
    log.response(request, resp=resp)
    return resp
Beispiel #18
0
async def bucketCheck(app):
    """ Verify that contents of bucket are self-consistent
    """

    now = int(time.time())
    log.info("bucket check {}".format(unixTimeToUTC(now)))

    # do initial listKeys
    await listKeys(app)

    # clear used flags
    clearUsedFlags(app)

    # mark objs
    await markObjs(app)

    unlinked_count = 0
    s3objs = app["s3objs"]
    for objid in s3objs:
        if isValidUuid(objid) and not isValidChunkId(objid):
            try:
                s3obj = await getS3Obj(app, objid)
                if s3obj.used is False:
                    unlinked_count += 1
            except HTTPInternalServerError as hpe:
                log.warn("got error retreiving {}: {}".format(objid, hpe.code))

    domains = app["domains"]
    for domain in domains:
        print("domain:", domain)
    roots = app["roots"]
    for root in roots:
        print("root:", root)

    top_level_domains = []
    for domain in domains:
        if domain[0] != '/':
            log.error("unexpected domain: {}".format(domain))
            continue
        if domain[1:].find('/') == -1:
            top_level_domains.append(domain)

    print("top-level-domains:")
    for domain in top_level_domains:
        print(domain)
    print("=" * 80)

    print("total storage: {}".format(app["bytes_in_bucket"]))
    print("Num objects: {}".format(len(app["s3objs"])))
    print("Num domains: {}".format(len(app["domains"])))
    print("Num root groups: {}".format(len(app["roots"])))
    print("Unlinked objects: {}".format(unlinked_count))
Beispiel #19
0
async def notify_root(app, root_id, bucket=None):
    # flag to write to S3

    log.info(f"notify_root: {root_id}")
    if not isValidUuid(root_id) or not isSchema2Id(root_id):
        log.error(f"unexpected call to notify with invalid id: {root_id}")
        return
    notify_req = getDataNodeUrl(app, root_id) + "/roots/" + root_id
    log.info(f"Notify: {notify_req} [{bucket}]")
    params = {}
    if bucket:
        params["bucket"] = bucket
    await http_post(app, notify_req, data={}, params=params)
Beispiel #20
0
async def scanRoot(app, rootid, update=False, bucket=None):

    # iterate through all s3 keys under the given root.
    # Return dict with stats for the root.
    #
    # Note: not re-entrant!  Only one scanRoot an be run at a time per app.
    log.info(f"scanRoot for rootid: {rootid} bucket: {bucket}")

    if not isValidUuid(rootid):
        raise ValueError("Invalid root id")

    if not isSchema2Id(rootid):
        log.warn(f"no tabulation for schema v1 id: {rootid} returning null results")
        return {}

    if not bucket:
        bucket = config.get("bucket_name")
    if not bucket:
        raise ValueError(f"no bucket defined for scan of {rootid}")

    root_key = getS3Key(rootid)

    if not root_key.endswith("/.group.json"):
        raise ValueError("unexpected root key")
    root_prefix = root_key[:-(len(".group.json"))]

    log.debug(f"scanRoot - using prefix: {root_prefix}")

    results = {}
    results["lastModified"] = 0
    results["num_groups"] = 0
    results["num_datatypes"] = 0
    results["datasets"] = {}  # since we need per dataset info
    results["num_chunks"] = 0
    results["allocated_bytes"] = 0
    results["metadata_bytes"] = 0
    results["scan_start"] = time.time()

    app["scanRoot_results"] = results

    await getStorKeys(app, prefix=root_prefix, include_stats=True, bucket=bucket, callback=scanRootCallback)

    log.info(f"scan complete for rootid: {rootid}")
    results["scan_complete"] = time.time()

    if update:
        # write .info object back to S3
        info_key = root_prefix + ".info.json"
        log.info(f"updating info key: {info_key}")
        await putStorJSONObj(app, info_key, results, bucket=bucket)
    return results
Beispiel #21
0
async def GET_DatasetShape(request):
    """HTTP method to return JSON for dataset's shape"""
    log.request(request)
    app = request.app

    dset_id = request.match_info.get('id')
    if not dset_id:
        msg = "Missing dataset id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(dset_id, "Dataset"):
        msg = f"Invalid dataset id: {dset_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    username, pswd = getUserPasswordFromRequest(request)
    if username is None and app['allow_noauth']:
        username = "******"
    else:
        await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)

    # get authoritative state for dataset from DN (even if it's in the meta_cache).
    dset_json = await getObjectJson(app, dset_id, refresh=True, bucket=bucket)

    await validateAction(app, domain, dset_id, username, "read")

    hrefs = []
    dset_uri = '/datasets/' + dset_id
    self_uri = dset_uri + "/shape"
    hrefs.append({'rel': 'self', 'href': getHref(request, self_uri)})
    dset_uri = '/datasets/' + dset_id
    hrefs.append({'rel': 'owner', 'href': getHref(request, dset_uri)})
    root_uri = '/groups/' + dset_json["root"]
    hrefs.append({'rel': 'root', 'href': getHref(request, root_uri)})

    resp_json = {}
    resp_json["shape"] = dset_json["shape"]
    resp_json["hrefs"] = hrefs
    resp_json["created"] = dset_json["created"]
    resp_json["lastModified"] = dset_json["lastModified"]

    resp = await jsonResponse(request, resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #22
0
async def DELETE_Link(request):
    """HTTP DELETE method for group links
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    log.info(f"DELETE link: {group_id}")

    if not isValidUuid(group_id, obj_class="group"):
        msg = f"Unexpected group_id: {group_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    link_title = request.match_info.get('title')
    validateLinkName(link_title)

    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    group_json = await get_metadata_obj(app, group_id, bucket=bucket)
    # TBD: Possible race condition
    if "links" not in group_json:
        log.error(f"unexpected group data for id: {group_id}")
        raise HTTPInternalServerError()

    links = group_json["links"]
    if link_title not in links:
        msg = f"Link name {link_title} not found in group: {group_id}"
        log.warn(msg)
        raise HTTPNotFound()

    del links[link_title]  # remove the link from dictionary

    # update the group lastModified
    now = time.time()
    group_json["lastModified"] = now

    # write back to S3
    await save_metadata_obj(app, group_id, group_json, bucket=bucket)

    hrefs = []  # TBD
    resp_json = {"href":  hrefs}

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #23
0
async def DELETE_Datatype(request):
    """HTTP method to delete a committed type resource"""
    log.request(request)
    app = request.app
    meta_cache = app['meta_cache']

    ctype_id = request.match_info.get('id')
    if not ctype_id:
        msg = "Missing committed type id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(ctype_id, "Type"):
        msg = f"Invalid committed type id: {ctype_id}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = f"Invalid domain: {domain}"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    bucket = getBucketForDomain(domain)
    params = {}
    if bucket:
        params["bucket"] = bucket

    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error(f"Expected root key for domain: {domain}")
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, ctype_id, username, "delete")

    req = getDataNodeUrl(app, ctype_id) + "/datatypes/" + ctype_id

    await http_delete(app, req, params=params)

    if ctype_id in meta_cache:
        del meta_cache[ctype_id]  # remove from cache

    resp = await jsonResponse(request, {})
    log.response(request, resp=resp)
    return resp
Beispiel #24
0
async def DELETE_Dataset(request):
    """HTTP method to delete a dataset resource"""
    log.request(request)
    app = request.app
    meta_cache = app['meta_cache']

    dset_id = request.match_info.get('id')
    if not dset_id:
        msg = "Missing dataset id"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    if not isValidUuid(dset_id, "Dataset"):
        msg = "Invalid dataset id: {}".format(dset_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    username, pswd = getUserPasswordFromRequest(request)
    await validateUserPassword(app, username, pswd)

    domain = getDomainFromRequest(request)
    if not isValidDomain(domain):
        msg = "Invalid host value: {}".format(domain)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    # get domain JSON
    domain_json = await getDomainJson(app, domain)
    if "root" not in domain_json:
        log.error("Expected root key for domain: {}".format(domain))
        raise HTTPBadRequest(reason="Unexpected Error")

    # TBD - verify that the obj_id belongs to the given domain
    await validateAction(app, domain, dset_id, username, "delete")

    req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id

    await http_delete(app, req)

    if dset_id in meta_cache:
        del meta_cache[dset_id]  # remove from cache

    resp = await jsonResponse(request, {})
    log.response(request, resp=resp)
    return resp
Beispiel #25
0
async def bucketGC(app):
    """ remove objects from db for any deleted root groups or datasets
    """
    log.info("bucketGC start")
    async_sleep_time = int(config.get("async_sleep_time"))
    log.info("async_sleep_time: {}".format(async_sleep_time))

    # update/initialize root object before starting GC

    while True:
        if app["node_state"] != "READY":
            log.info("bucketGC - waiting for Node state to be READY")
            await asyncio.sleep(async_sleep_time)
            continue  # wait for READY state

        gc_ids = app["gc_ids"]
        while len(gc_ids) > 0:
            obj_id = gc_ids.pop()
            log.info(f"got gc id: {obj_id}")
            if not isValidUuid(obj_id):
                log.error(f"bucketGC - got unexpected gc id: {obj_id}")
                continue
            if not isSchema2Id(obj_id):
                log.warn(f"bucketGC - ignoring v1 id: {obj_id}")
                continue
            if getCollectionForId(obj_id) == "groups":
                if not isRootObjId(obj_id):
                    log.error(f"bucketGC - unexpected non-root id: {obj_id}")
                    continue
                log.info(f"bucketGC - delete root objs: {obj_id}")
                await removeKeys(app, obj_id)
            elif getCollectionForId(obj_id) == "datasets":
                log.info(f"bucketGC - delete dataset: {obj_id}")
                await removeKeys(app, obj_id)
            else:
                log.error(f"bucketGC - unexpected obj_id class: {obj_id}")

        log.info(f"bucketGC - sleep: {async_sleep_time}")
        await asyncio.sleep(async_sleep_time)

    # shouldn't ever get here
    log.error("bucketGC terminating unexpectedly")
Beispiel #26
0
def main():
    if len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help":
        printUsage()
        sys.exit(1)

    obj_id = sys.argv[-1]
    if not isValidUuid(obj_id):
        print("Invalid obj id")

    # we need to setup a asyncio loop to query s3
    loop = asyncio.get_event_loop()
    session = get_session(loop=loop)

    app = {}
    app["session"] = session
    app['bucket_name'] = config.get("bucket_name")

    loop.run_until_complete(printS3Obj(app, obj_id))

    loop.close()
Beispiel #27
0
async def DELETE_Chunk(request):
    """HTTP DELETE method for /chunks/
    Note: clients (i.e. SN nodes) don't directly delete chunks.  This method should
    only be called by the AN node.
    """
    log.request(request)
    app = request.app
    chunk_id = request.match_info.get('id')
    if not chunk_id:
        msg = "Missing chunk id"
        log.error(msg)
        raise HTTPBadRequest(reason=msg)
    log.info("DELETE chunk: {}".format(chunk_id))

    if not isValidUuid(chunk_id, "Chunk"):
        msg = "Invalid chunk id: {}".format(chunk_id)
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    validateInPartition(app, chunk_id)

    chunk_cache = app['chunk_cache'] 
    s3_key = getS3Key(chunk_id)
    log.debug("DELETE_Chunk s3_key: {}".format(s3_key))

    if chunk_id in chunk_cache:
        del chunk_cache[chunk_id]

    deflate_map = app["deflate_map"]
    dset_id = getDatasetId(chunk_id)
    if dset_id in deflate_map:
        # The only reason chunks are ever deleted is if the dataset is being deleted,
        # so it should be save to remove this entry now
        log.info("Removing deflate_map entry for {}".format(dset_id))
        del deflate_map[dset_id]

    resp_json = {  }   
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Beispiel #28
0
async def PUT_Domain(request):
    """HTTP method to get object s3 state """
    log.request(request)
    
    app = request.app
    pending_set = app["pending"]
    params = request.rel_url.query
    if "domain" not in params:
        msg = "No domain provided"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)
    domain = params["domain"]

    if not domain.startswith("/"):
        msg = "Domain expected to start with /"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if len(domain) < 2:
        msg = "Invalid domain"
        log.warn(msg)
        raise HTTPBadRequest(reason=msg)

    if "root" in params:
        rootid = params["root"]
       
        if not isValidUuid(rootid):
            log.warn(f"Invalid id: {rootid}")
            raise HTTPBadRequest()
        log.debug(f"new rootid: {rootid} for domain: {domain}")

        if isSchema2Id(rootid):
            log.info(f"Adding root: {rootid} to pending for PUT domain: {domain}")
            pending_set.add(rootid)

    resp_json = {}
    resp = json_response(resp_json, status=201)
    log.response(request, resp=resp)
    return resp
Beispiel #29
0
async def GET_Link(request):
    """HTTP GET method to return JSON for a link
    """
    log.request(request)
    app = request.app
    params = request.rel_url.query
    group_id = get_obj_id(request)
    log.info(f"GET link: {group_id}")

    if not isValidUuid(group_id, obj_class="group"):
        log.error(f"Unexpected group_id: {group_id}")
        raise HTTPInternalServerError()

    link_title = request.match_info.get('title')

    validateLinkName(link_title)

    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    group_json = await get_metadata_obj(app, group_id, bucket=bucket)
    log.info(f"for id: {group_id} got group json: {group_json}")
    if "links" not in group_json:
        log.error(f"unexpected group data for id: {group_id}")
        raise HTTPInternalServerError()

    links = group_json["links"]
    if link_title not in links:
        log.warn(f"Link name {link_title} not found in group: {group_id}")
        raise HTTPNotFound()

    link_json = links[link_title]

    resp = json_response(link_json)
    log.response(request, resp=resp)
    return resp
Beispiel #30
0
def validateObjId(obj_id, bucket):
    """
    Verifies the passed in is what we are expecting.
    For uuids, obj_id should be an actual uuid and bucket should be non-null
    For domains, obj_id should include the bucket prefix and bucket should be empty
       e.g. obj_id="mybucket/home/bob/myfile.h5"
    """
    if isValidDomain(obj_id):
        if obj_id[0] == '/':
            # bucket name should always be prefixed
            # (so the obj_id is cannonical)
            msg = f"bucket not included for domain: {obj_id}"
            log.error(msg)
            raise HTTPInternalServerError()
        if bucket:
            msg = f"bucket param should not be used with obj_id for domain: {obj_id}"
            log.error(msg)
            raise HTTPInternalServerError()
    elif not isValidUuid(obj_id):
        msg = f"Invalid obj id: {obj_id}"
        log.error(msg)

        raise HTTPInternalServerError()