def main(): if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"): printUsage() rootid = sys.argv[1] if not isValidUuid(rootid): print("Invalid root id!") sys.exit(1) if not isSchema2Id(rootid): print("This tool can only be used with Schema v2 ids") sys.exit(1) # we need to setup a asyncio loop to query s3 loop = asyncio.get_event_loop() app = {} app["bucket_name"] = config.get("bucket_name") app["loop"] = loop session = get_session(loop=loop) app["session"] = session loop.run_until_complete(run_delete(app, rootid)) loop.close() print("done!")
async def PUT_Object(request): """HTTP method to notify creation/update of objid""" log.request(request) app = request.app pending_set = app["pending"] objid = request.match_info.get('id') if not objid: log.error("PUT_Object with no id") raise HTTPBadRequest() log.info(f"PUT_Object/{objid}") if not isValidUuid(objid): log.warn(f"Invalid id: {objid}, ignoring") raise HTTPBadRequest() if isSchema2Id(objid): rootid = getRootObjId(objid) log.debug(f"adding root: {rootid} to pending queue for objid: {objid}") pending_set.add(rootid) resp_json = { } resp = json_response(resp_json, status=201) log.response(request, resp=resp) return resp
async def GET_Group(request): """HTTP GET method to return JSON for /groups/ """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) if "bucket" in params: bucket = params["bucket"] else: bucket = None log.info(f"GET group: {group_id} bucket: {bucket}") if not isValidUuid(group_id, obj_class="group"): log.error("Unexpected group_id: {}".format(group_id)) raise HTTPInternalServerError() group_json = await get_metadata_obj(app, group_id, bucket=bucket) resp_json = {} resp_json["id"] = group_json["id"] resp_json["root"] = group_json["root"] resp_json["created"] = group_json["created"] resp_json["lastModified"] = group_json["lastModified"] resp_json["linkCount"] = len(group_json["links"]) resp_json["attributeCount"] = len(group_json["attributes"]) if "include_links" in params and params["include_links"]: resp_json["links"] = group_json["links"] if "include_attrs" in params and params["include_attrs"]: resp_json["attributes"] = group_json["attributes"] resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Dataset(request): """HTTP DELETE method for dataset """ log.request(request) app = request.app params = request.rel_url.query dset_id = request.match_info.get('id') log.info("DELETE dataset: {}".format(dset_id)) if not isValidUuid(dset_id, obj_class="dataset"): log.error("Unexpected dataset id: {}".format(dset_id)) raise HTTPInternalServerError() # verify the id exist obj_found = await check_metadata_obj(app, dset_id) if not obj_found: raise HTTPNotFound() log.debug("deleting dataset: {}".format(dset_id)) notify = True if "Notify" in params and not params["Notify"]: notify = False await delete_metadata_obj(app, dset_id, notify=notify) resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def GET_Datatype(request): """HTTP GET method to return JSON for /groups/ """ log.request(request) app = request.app params = request.rel_url.query ctype_id = get_obj_id(request) if not isValidUuid(ctype_id, obj_class="type"): log.error(f"Unexpected type_id: {ctype_id}") raise HTTPInternalServerError() if "bucket" in params: bucket = params["bucket"] else: bucket = None ctype_json = await get_metadata_obj(app, ctype_id, bucket=bucket) resp_json = { } resp_json["id"] = ctype_json["id"] resp_json["root"] = ctype_json["root"] resp_json["created"] = ctype_json["created"] resp_json["lastModified"] = ctype_json["lastModified"] resp_json["type"] = ctype_json["type"] resp_json["attributeCount"] = len(ctype_json["attributes"]) if "include_attrs" in params and params["include_attrs"]: resp_json["attributes"] = ctype_json["attributes"] resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def check_metadata_obj(app, obj_id): """ Return False is obj does not exist """ if not isValidDomain(obj_id) and not isValidUuid(obj_id): msg = "Invalid obj id: {}".format(obj_id) log.error(msg) raise HTTPInternalServerError() try: validateInPartition(app, obj_id) except KeyError: log.error("Domain not in partition") raise HTTPInternalServerError() deleted_ids = app['deleted_ids'] if obj_id in deleted_ids: msg = "{} has been deleted".format(obj_id) log.info(msg) return False meta_cache = app['meta_cache'] if obj_id in meta_cache: found = True else: # Not in chache, check s3 obj exists s3_key = getS3Key(obj_id) log.debug("check_metadata_obj({})".format(s3_key)) # does key exist? found = await isS3Obj(app, s3_key) return found
async def GET_Dataset(request): """HTTP GET method to return JSON for /groups/ """ log.request(request) app = request.app dset_id = get_obj_id(request) if not isValidUuid(dset_id, obj_class="dataset"): log.error("Unexpected type_id: {}".format(dset_id)) raise HTTPInternalServerError() dset_json = await get_metadata_obj(app, dset_id) resp_json = {} resp_json["id"] = dset_json["id"] resp_json["root"] = dset_json["root"] resp_json["created"] = dset_json["created"] resp_json["lastModified"] = dset_json["lastModified"] resp_json["type"] = dset_json["type"] resp_json["shape"] = dset_json["shape"] resp_json["attributeCount"] = len(dset_json["attributes"]) if "creationProperties" in dset_json: resp_json["creationProperties"] = dset_json["creationProperties"] if "layout" in dset_json: resp_json["layout"] = dset_json["layout"] resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Group(request): """HTTP DELETE method for /groups/ """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) log.info("DELETE group: {}".format(group_id)) if not isValidUuid(group_id, obj_class="group"): log.error("Unexpected group_id: {}".format(group_id)) raise HTTPInternalServerError() # verify the id exist obj_found = await check_metadata_obj(app, group_id) if not obj_found: log.debug(f"delete called on non-exsistet obj: {group_id}") raise HTTPNotFound() log.debug("deleting group: {}".format(group_id)) notify = True if "Notify" in params and not params["Notify"]: notify = False await delete_metadata_obj(app, group_id, notify=notify) resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def GET_Group(request): """HTTP GET method to return JSON for /groups/ """ log.request(request) app = request.app group_id = get_obj_id(request) log.info("GET group: {}".format(group_id)) if not isValidUuid(group_id, obj_class="group"): log.error("Unexpected group_id: {}".format(group_id)) raise HTTPInternalServerError() group_json = await get_metadata_obj(app, group_id) resp_json = {} resp_json["id"] = group_json["id"] resp_json["root"] = group_json["root"] resp_json["created"] = group_json["created"] resp_json["lastModified"] = group_json["lastModified"] resp_json["linkCount"] = len(group_json["links"]) resp_json["attributeCount"] = len(group_json["attributes"]) resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Link(request): """HTTP method to delete a link""" log.request(request) app = request.app group_id = request.match_info.get('id') if not group_id: msg = "Missing group id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(group_id, obj_class="Group"): msg = "Invalid group id: {}".format(group_id) log.warn(msg) raise HTTPBadRequest(reason=msg) link_title = request.match_info.get('title') validateLinkName(link_title) username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = "Invalid host value: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) await validateAction(app, domain, group_id, username, "delete") req = getDataNodeUrl(app, group_id) req += "/groups/" + group_id + "/links/" + link_title rsp_json = await http_delete(app, req) resp = await jsonResponse(request, rsp_json) log.response(request, resp=resp) return resp
def get_obj_id(request, body=None): """ Get object id from request Raise HTTPException on errors. """ obj_id = None collection = None app = request.app if body and "id" in body: obj_id = body["id"] else: collection = getRequestCollectionName( request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.error(msg) raise HTTPInternalServerError() if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid obj id: {obj_id}" log.error(msg) raise HTTPInternalServerError() try: validateInPartition(app, obj_id) except KeyError: log.error("Domain not in partition") raise HTTPInternalServerError() return obj_id
async def DELETE_Object(request): log.request(request) app = request.app delete_set = app["delete_set"] objid = request.match_info.get('id') if not isValidUuid(objid): log.warn(f"Invalid id: {objid}") raise HTTPBadRequest() if isSchema2Id(objid): # get rootid for this id collection = getCollectionForId(objid) if collection == "datasets": delete_set.add(objid) elif collection == "groups": # only need to do anything if this the root group if isRootObjId(objid): log.info(f"adding root group: {objid} to delete_set") delete_set.add(objid) else: log.info(f"ignoring delete non-root group: {objid}") elif collection == "datatypes": log.info(f"ignoring delete for datatype object: {objid}") else: log.error(f"Unexpected collection type: {collection}") resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Group(request): """HTTP method to delete a group resource""" log.request(request) app = request.app meta_cache = app['meta_cache'] group_id = request.match_info.get('id') if not group_id: msg = "Missing group id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(group_id, "Group"): msg = f"Invalid group id: {group_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, group_id, username, "delete") if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") if group_id == domain_json["root"]: msg = "Forbidden - deletion of root group is not allowed - delete domain first" log.warn(msg) raise HTTPForbidden() req = getDataNodeUrl(app, group_id) req += "/groups/" + group_id params = {} if bucket: params["bucket"] = bucket log.debug(f"http_delete req: {req} params: {params}") await http_delete(app, req, params=params) if group_id in meta_cache: del meta_cache[group_id] # remove from cache resp = await jsonResponse(request, {}) log.response(request, resp=resp) return resp
async def DELETE_Chunk(request): """HTTP DELETE method for /chunks/ Note: clients (i.e. SN nodes) don't directly delete chunks. This method should only be called by the AN node. """ log.request(request) app = request.app params = request.rel_url.query chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) log.info(f"DELETE chunk: {chunk_id}") if not isValidUuid(chunk_id, "Chunk"): msg = f"Invalid chunk id: {chunk_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) if "bucket" in params: bucket = params["bucket"] else: bucket = None validateInPartition(app, chunk_id) chunk_cache = app['chunk_cache'] s3key = getS3Key(chunk_id) log.debug(f"DELETE_Chunk s3_key: {s3key}") if chunk_id in chunk_cache: del chunk_cache[chunk_id] deflate_map = app["deflate_map"] shuffle_map = app["shuffle_map"] dset_id = getDatasetId(chunk_id) if dset_id in deflate_map: # The only reason chunks are ever deleted is if the dataset is being deleted, # so it should be safe to remove this entry now log.info(f"Removing deflate_map entry for {dset_id}") del deflate_map[dset_id] if dset_id in shuffle_map: log.info(f"Removing shuffle_map entry for {dset_id}") del shuffle_map[dset_id] if await isStorObj(app, s3key, bucket=bucket): await deleteStorObj(app, s3key, bucket=bucket) else: log.info( f"delete_metadata_obj - key {s3key} not found (never written)?") resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
def main(): if len(sys.argv) == 1 or len(sys.argv) > 1 and (sys.argv[1] == "-h" or sys.argv[1] == "--help"): printUsage() rootid = sys.argv[1] if len(sys.argv) > 2 and sys.argv[2] == "-update": do_update = True else: do_update = False if not isValidUuid(rootid): print("Invalid root id!") sys.exit(1) if not isSchema2Id(rootid): print("This tool can only be used with Schema v2 ids") sys.exit(1) # we need to setup a asyncio loop to query s3 loop = asyncio.get_event_loop() app = {} app["bucket_name"] = config.get("bucket_name") app["loop"] = loop session = get_session(loop=loop) app["session"] = session loop.run_until_complete(run_scan(app, rootid=rootid, update=do_update)) loop.close() results = app["scanRoot_results"] datasets = results["datasets"] lastModified = datetime.fromtimestamp(results["lastModified"]) total_size = results["metadata_bytes"] + results["allocated_bytes"] print(f"lastModified: {lastModified}") print(f"size: {total_size}") print(f"num chunks: {results['num_chunks']}") print(f"num_groups: {results['num_groups']}") print(f"num_datatypes: {results['num_datatypes']}") print(f"num_datasets: {len(datasets)}") for dsetid in datasets: dataset_info = datasets[dsetid] print( f" {dsetid}: {dataset_info['lastModified']}, {dataset_info['num_chunks']}, {dataset_info['allocated_bytes']}" ) scan_start = datetime.fromtimestamp(results["scan_start"]) print(f"scan_start: {scan_start}") scan_complete = datetime.fromtimestamp(results["scan_complete"]) print(f"scan_complete: {scan_complete}") print("done!")
async def delete_metadata_obj(app, obj_id, notify=True, root_id=None, bucket=None): """ Delete the given object """ meta_cache = app['meta_cache'] dirty_ids = app["dirty_ids"] log.info(f"delete_meta_data_obj: {obj_id} notify: {notify}") validateObjId(obj_id, bucket) if isValidDomain(obj_id): bucket = getBucketForDomain(obj_id) try: validateInPartition(app, obj_id) except KeyError: log.error(f"obj: {obj_id} not in partition") raise HTTPInternalServerError() deleted_ids = app['deleted_ids'] if obj_id in deleted_ids: log.warn(f"{obj_id} has already been deleted") else: log.debug(f"adding {obj_id} to deleted ids") deleted_ids.add(obj_id) if obj_id in meta_cache: log.debug(f"removing {obj_id} from meta_cache") del meta_cache[obj_id] if obj_id in dirty_ids: log.debug(f"removing dirty_ids for: {obj_id}") del dirty_ids[obj_id] # remove from S3 (if present) s3key = getS3Key(obj_id) if await isS3Obj(app, s3key, bucket=bucket): await deleteS3Obj(app, s3key, bucket=bucket) else: log.info( f"delete_metadata_obj - key {s3key} not found (never written)?") if isValidUuid(obj_id) and isSchema2Id(obj_id): if isRootObjId(obj_id): # add to gc ids so sub-objects will be deleted gc_ids = app["gc_ids"] log.info(f"adding root id: {obj_id} for GC cleanup") gc_ids.add(obj_id) elif notify: root_id = getRootObjId(obj_id) await notify_root(app, root_id, bucket=bucket) # no notify for domain deletes since the root group is being deleted log.debug(f"delete_metadata_obj for {obj_id} done")
async def DELETE_Attribute(request): """HTTP method to delete a attribute resource""" log.request(request) app = request.app collection = getRequestCollectionName( request) # returns datasets|groups|datatypes obj_id = request.match_info.get('id') if not obj_id: msg = "Missing object id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(obj_id, obj_class=collection): msg = f"Invalid object id: {obj_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) attr_name = request.match_info.get('name') log.debug(f"Attribute name: [{attr_name}]") validateAttributeName(attr_name) username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, obj_id, username, "delete") req = getDataNodeUrl(app, obj_id) req += '/' + collection + '/' + obj_id + "/attributes/" + attr_name log.info("PUT Attribute: " + req) params = {} if bucket: params["bucket"] = bucket rsp_json = await http_delete(app, req, params=params) log.info(f"PUT Attribute resp: {rsp_json}") hrefs = [] # TBD req_rsp = {"hrefs": hrefs} resp = await jsonResponse(request, req_rsp) log.response(request, resp=resp) return resp
async def bucketCheck(app): """ Verify that contents of bucket are self-consistent """ now = int(time.time()) log.info("bucket check {}".format(unixTimeToUTC(now))) # do initial listKeys await listKeys(app) # clear used flags clearUsedFlags(app) # mark objs await markObjs(app) unlinked_count = 0 s3objs = app["s3objs"] for objid in s3objs: if isValidUuid(objid) and not isValidChunkId(objid): try: s3obj = await getS3Obj(app, objid) if s3obj.used is False: unlinked_count += 1 except HTTPInternalServerError as hpe: log.warn("got error retreiving {}: {}".format(objid, hpe.code)) domains = app["domains"] for domain in domains: print("domain:", domain) roots = app["roots"] for root in roots: print("root:", root) top_level_domains = [] for domain in domains: if domain[0] != '/': log.error("unexpected domain: {}".format(domain)) continue if domain[1:].find('/') == -1: top_level_domains.append(domain) print("top-level-domains:") for domain in top_level_domains: print(domain) print("=" * 80) print("total storage: {}".format(app["bytes_in_bucket"])) print("Num objects: {}".format(len(app["s3objs"]))) print("Num domains: {}".format(len(app["domains"]))) print("Num root groups: {}".format(len(app["roots"]))) print("Unlinked objects: {}".format(unlinked_count))
async def notify_root(app, root_id, bucket=None): # flag to write to S3 log.info(f"notify_root: {root_id}") if not isValidUuid(root_id) or not isSchema2Id(root_id): log.error(f"unexpected call to notify with invalid id: {root_id}") return notify_req = getDataNodeUrl(app, root_id) + "/roots/" + root_id log.info(f"Notify: {notify_req} [{bucket}]") params = {} if bucket: params["bucket"] = bucket await http_post(app, notify_req, data={}, params=params)
async def scanRoot(app, rootid, update=False, bucket=None): # iterate through all s3 keys under the given root. # Return dict with stats for the root. # # Note: not re-entrant! Only one scanRoot an be run at a time per app. log.info(f"scanRoot for rootid: {rootid} bucket: {bucket}") if not isValidUuid(rootid): raise ValueError("Invalid root id") if not isSchema2Id(rootid): log.warn(f"no tabulation for schema v1 id: {rootid} returning null results") return {} if not bucket: bucket = config.get("bucket_name") if not bucket: raise ValueError(f"no bucket defined for scan of {rootid}") root_key = getS3Key(rootid) if not root_key.endswith("/.group.json"): raise ValueError("unexpected root key") root_prefix = root_key[:-(len(".group.json"))] log.debug(f"scanRoot - using prefix: {root_prefix}") results = {} results["lastModified"] = 0 results["num_groups"] = 0 results["num_datatypes"] = 0 results["datasets"] = {} # since we need per dataset info results["num_chunks"] = 0 results["allocated_bytes"] = 0 results["metadata_bytes"] = 0 results["scan_start"] = time.time() app["scanRoot_results"] = results await getStorKeys(app, prefix=root_prefix, include_stats=True, bucket=bucket, callback=scanRootCallback) log.info(f"scan complete for rootid: {rootid}") results["scan_complete"] = time.time() if update: # write .info object back to S3 info_key = root_prefix + ".info.json" log.info(f"updating info key: {info_key}") await putStorJSONObj(app, info_key, results, bucket=bucket) return results
async def GET_DatasetShape(request): """HTTP method to return JSON for dataset's shape""" log.request(request) app = request.app dset_id = request.match_info.get('id') if not dset_id: msg = "Missing dataset id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(dset_id, "Dataset"): msg = f"Invalid dataset id: {dset_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) username, pswd = getUserPasswordFromRequest(request) if username is None and app['allow_noauth']: username = "******" else: await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) # get authoritative state for dataset from DN (even if it's in the meta_cache). dset_json = await getObjectJson(app, dset_id, refresh=True, bucket=bucket) await validateAction(app, domain, dset_id, username, "read") hrefs = [] dset_uri = '/datasets/' + dset_id self_uri = dset_uri + "/shape" hrefs.append({'rel': 'self', 'href': getHref(request, self_uri)}) dset_uri = '/datasets/' + dset_id hrefs.append({'rel': 'owner', 'href': getHref(request, dset_uri)}) root_uri = '/groups/' + dset_json["root"] hrefs.append({'rel': 'root', 'href': getHref(request, root_uri)}) resp_json = {} resp_json["shape"] = dset_json["shape"] resp_json["hrefs"] = hrefs resp_json["created"] = dset_json["created"] resp_json["lastModified"] = dset_json["lastModified"] resp = await jsonResponse(request, resp_json) log.response(request, resp=resp) return resp
async def DELETE_Link(request): """HTTP DELETE method for group links """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) log.info(f"DELETE link: {group_id}") if not isValidUuid(group_id, obj_class="group"): msg = f"Unexpected group_id: {group_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) link_title = request.match_info.get('title') validateLinkName(link_title) if "bucket" in params: bucket = params["bucket"] else: bucket = None group_json = await get_metadata_obj(app, group_id, bucket=bucket) # TBD: Possible race condition if "links" not in group_json: log.error(f"unexpected group data for id: {group_id}") raise HTTPInternalServerError() links = group_json["links"] if link_title not in links: msg = f"Link name {link_title} not found in group: {group_id}" log.warn(msg) raise HTTPNotFound() del links[link_title] # remove the link from dictionary # update the group lastModified now = time.time() group_json["lastModified"] = now # write back to S3 await save_metadata_obj(app, group_id, group_json, bucket=bucket) hrefs = [] # TBD resp_json = {"href": hrefs} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Datatype(request): """HTTP method to delete a committed type resource""" log.request(request) app = request.app meta_cache = app['meta_cache'] ctype_id = request.match_info.get('id') if not ctype_id: msg = "Missing committed type id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(ctype_id, "Type"): msg = f"Invalid committed type id: {ctype_id}" log.warn(msg) raise HTTPBadRequest(reason=msg) username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = f"Invalid domain: {domain}" log.warn(msg) raise HTTPBadRequest(reason=msg) bucket = getBucketForDomain(domain) params = {} if bucket: params["bucket"] = bucket # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error(f"Expected root key for domain: {domain}") raise HTTPBadRequest(reason="Unexpected Error") # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, ctype_id, username, "delete") req = getDataNodeUrl(app, ctype_id) + "/datatypes/" + ctype_id await http_delete(app, req, params=params) if ctype_id in meta_cache: del meta_cache[ctype_id] # remove from cache resp = await jsonResponse(request, {}) log.response(request, resp=resp) return resp
async def DELETE_Dataset(request): """HTTP method to delete a dataset resource""" log.request(request) app = request.app meta_cache = app['meta_cache'] dset_id = request.match_info.get('id') if not dset_id: msg = "Missing dataset id" log.warn(msg) raise HTTPBadRequest(reason=msg) if not isValidUuid(dset_id, "Dataset"): msg = "Invalid dataset id: {}".format(dset_id) log.warn(msg) raise HTTPBadRequest(reason=msg) username, pswd = getUserPasswordFromRequest(request) await validateUserPassword(app, username, pswd) domain = getDomainFromRequest(request) if not isValidDomain(domain): msg = "Invalid host value: {}".format(domain) log.warn(msg) raise HTTPBadRequest(reason=msg) # get domain JSON domain_json = await getDomainJson(app, domain) if "root" not in domain_json: log.error("Expected root key for domain: {}".format(domain)) raise HTTPBadRequest(reason="Unexpected Error") # TBD - verify that the obj_id belongs to the given domain await validateAction(app, domain, dset_id, username, "delete") req = getDataNodeUrl(app, dset_id) + "/datasets/" + dset_id await http_delete(app, req) if dset_id in meta_cache: del meta_cache[dset_id] # remove from cache resp = await jsonResponse(request, {}) log.response(request, resp=resp) return resp
async def bucketGC(app): """ remove objects from db for any deleted root groups or datasets """ log.info("bucketGC start") async_sleep_time = int(config.get("async_sleep_time")) log.info("async_sleep_time: {}".format(async_sleep_time)) # update/initialize root object before starting GC while True: if app["node_state"] != "READY": log.info("bucketGC - waiting for Node state to be READY") await asyncio.sleep(async_sleep_time) continue # wait for READY state gc_ids = app["gc_ids"] while len(gc_ids) > 0: obj_id = gc_ids.pop() log.info(f"got gc id: {obj_id}") if not isValidUuid(obj_id): log.error(f"bucketGC - got unexpected gc id: {obj_id}") continue if not isSchema2Id(obj_id): log.warn(f"bucketGC - ignoring v1 id: {obj_id}") continue if getCollectionForId(obj_id) == "groups": if not isRootObjId(obj_id): log.error(f"bucketGC - unexpected non-root id: {obj_id}") continue log.info(f"bucketGC - delete root objs: {obj_id}") await removeKeys(app, obj_id) elif getCollectionForId(obj_id) == "datasets": log.info(f"bucketGC - delete dataset: {obj_id}") await removeKeys(app, obj_id) else: log.error(f"bucketGC - unexpected obj_id class: {obj_id}") log.info(f"bucketGC - sleep: {async_sleep_time}") await asyncio.sleep(async_sleep_time) # shouldn't ever get here log.error("bucketGC terminating unexpectedly")
def main(): if len(sys.argv) == 1 or sys.argv[1] == "-h" or sys.argv[1] == "--help": printUsage() sys.exit(1) obj_id = sys.argv[-1] if not isValidUuid(obj_id): print("Invalid obj id") # we need to setup a asyncio loop to query s3 loop = asyncio.get_event_loop() session = get_session(loop=loop) app = {} app["session"] = session app['bucket_name'] = config.get("bucket_name") loop.run_until_complete(printS3Obj(app, obj_id)) loop.close()
async def DELETE_Chunk(request): """HTTP DELETE method for /chunks/ Note: clients (i.e. SN nodes) don't directly delete chunks. This method should only be called by the AN node. """ log.request(request) app = request.app chunk_id = request.match_info.get('id') if not chunk_id: msg = "Missing chunk id" log.error(msg) raise HTTPBadRequest(reason=msg) log.info("DELETE chunk: {}".format(chunk_id)) if not isValidUuid(chunk_id, "Chunk"): msg = "Invalid chunk id: {}".format(chunk_id) log.warn(msg) raise HTTPBadRequest(reason=msg) validateInPartition(app, chunk_id) chunk_cache = app['chunk_cache'] s3_key = getS3Key(chunk_id) log.debug("DELETE_Chunk s3_key: {}".format(s3_key)) if chunk_id in chunk_cache: del chunk_cache[chunk_id] deflate_map = app["deflate_map"] dset_id = getDatasetId(chunk_id) if dset_id in deflate_map: # The only reason chunks are ever deleted is if the dataset is being deleted, # so it should be save to remove this entry now log.info("Removing deflate_map entry for {}".format(dset_id)) del deflate_map[dset_id] resp_json = { } resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def PUT_Domain(request): """HTTP method to get object s3 state """ log.request(request) app = request.app pending_set = app["pending"] params = request.rel_url.query if "domain" not in params: msg = "No domain provided" log.warn(msg) raise HTTPBadRequest(reason=msg) domain = params["domain"] if not domain.startswith("/"): msg = "Domain expected to start with /" log.warn(msg) raise HTTPBadRequest(reason=msg) if len(domain) < 2: msg = "Invalid domain" log.warn(msg) raise HTTPBadRequest(reason=msg) if "root" in params: rootid = params["root"] if not isValidUuid(rootid): log.warn(f"Invalid id: {rootid}") raise HTTPBadRequest() log.debug(f"new rootid: {rootid} for domain: {domain}") if isSchema2Id(rootid): log.info(f"Adding root: {rootid} to pending for PUT domain: {domain}") pending_set.add(rootid) resp_json = {} resp = json_response(resp_json, status=201) log.response(request, resp=resp) return resp
async def GET_Link(request): """HTTP GET method to return JSON for a link """ log.request(request) app = request.app params = request.rel_url.query group_id = get_obj_id(request) log.info(f"GET link: {group_id}") if not isValidUuid(group_id, obj_class="group"): log.error(f"Unexpected group_id: {group_id}") raise HTTPInternalServerError() link_title = request.match_info.get('title') validateLinkName(link_title) if "bucket" in params: bucket = params["bucket"] else: bucket = None group_json = await get_metadata_obj(app, group_id, bucket=bucket) log.info(f"for id: {group_id} got group json: {group_json}") if "links" not in group_json: log.error(f"unexpected group data for id: {group_id}") raise HTTPInternalServerError() links = group_json["links"] if link_title not in links: log.warn(f"Link name {link_title} not found in group: {group_id}") raise HTTPNotFound() link_json = links[link_title] resp = json_response(link_json) log.response(request, resp=resp) return resp
def validateObjId(obj_id, bucket): """ Verifies the passed in is what we are expecting. For uuids, obj_id should be an actual uuid and bucket should be non-null For domains, obj_id should include the bucket prefix and bucket should be empty e.g. obj_id="mybucket/home/bob/myfile.h5" """ if isValidDomain(obj_id): if obj_id[0] == '/': # bucket name should always be prefixed # (so the obj_id is cannonical) msg = f"bucket not included for domain: {obj_id}" log.error(msg) raise HTTPInternalServerError() if bucket: msg = f"bucket param should not be used with obj_id for domain: {obj_id}" log.error(msg) raise HTTPInternalServerError() elif not isValidUuid(obj_id): msg = f"Invalid obj id: {obj_id}" log.error(msg) raise HTTPInternalServerError()