async def POST_Root(request): """ Notify root that content in the domain has been modified. """ log.request(request) app = request.app root_id = request.match_info.get('id') if not root_id: log.error("missing id in request") raise HTTPInternalServerError() if not isSchema2Id(root_id): log.error(f"expected schema2 id but got: {root_id}") raise HTTPInternalServerError() if not isRootObjId(root_id): log.error(f"Expected root id but got: {root_id}") raise HTTPInternalServerError() params = request.rel_url.query if "bucket" in params: bucket = params["bucket"] else: bucket = None log.info(f"POST_Root: {root_id} bucket: {bucket}") # add id to be scanned by the s3sync task root_scan_ids = app["root_scan_ids"] root_scan_ids[root_id] = bucket resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def DELETE_Object(request): log.request(request) app = request.app delete_set = app["delete_set"] objid = request.match_info.get('id') if not isValidUuid(objid): log.warn(f"Invalid id: {objid}") raise HTTPBadRequest() if isSchema2Id(objid): # get rootid for this id collection = getCollectionForId(objid) if collection == "datasets": delete_set.add(objid) elif collection == "groups": # only need to do anything if this the root group if isRootObjId(objid): log.info(f"adding root group: {objid} to delete_set") delete_set.add(objid) else: log.info(f"ignoring delete non-root group: {objid}") elif collection == "datatypes": log.info(f"ignoring delete for datatype object: {objid}") else: log.error(f"Unexpected collection type: {collection}") resp_json = {} resp = json_response(resp_json) log.response(request, resp=resp) return resp
async def delete_metadata_obj(app, obj_id, notify=True, root_id=None, bucket=None): """ Delete the given object """ meta_cache = app['meta_cache'] dirty_ids = app["dirty_ids"] log.info(f"delete_meta_data_obj: {obj_id} notify: {notify}") validateObjId(obj_id, bucket) if isValidDomain(obj_id): bucket = getBucketForDomain(obj_id) try: validateInPartition(app, obj_id) except KeyError: log.error(f"obj: {obj_id} not in partition") raise HTTPInternalServerError() deleted_ids = app['deleted_ids'] if obj_id in deleted_ids: log.warn(f"{obj_id} has already been deleted") else: log.debug(f"adding {obj_id} to deleted ids") deleted_ids.add(obj_id) if obj_id in meta_cache: log.debug(f"removing {obj_id} from meta_cache") del meta_cache[obj_id] if obj_id in dirty_ids: log.debug(f"removing dirty_ids for: {obj_id}") del dirty_ids[obj_id] # remove from S3 (if present) s3key = getS3Key(obj_id) if await isS3Obj(app, s3key, bucket=bucket): await deleteS3Obj(app, s3key, bucket=bucket) else: log.info( f"delete_metadata_obj - key {s3key} not found (never written)?") if isValidUuid(obj_id) and isSchema2Id(obj_id): if isRootObjId(obj_id): # add to gc ids so sub-objects will be deleted gc_ids = app["gc_ids"] log.info(f"adding root id: {obj_id} for GC cleanup") gc_ids.add(obj_id) elif notify: root_id = getRootObjId(obj_id) await notify_root(app, root_id, bucket=bucket) # no notify for domain deletes since the root group is being deleted log.debug(f"delete_metadata_obj for {obj_id} done")
async def bucketGC(app): """ remove objects from db for any deleted root groups or datasets """ log.info("bucketGC start") async_sleep_time = int(config.get("async_sleep_time")) log.info("async_sleep_time: {}".format(async_sleep_time)) # update/initialize root object before starting GC while True: if app["node_state"] != "READY": log.info("bucketGC - waiting for Node state to be READY") await asyncio.sleep(async_sleep_time) continue # wait for READY state gc_ids = app["gc_ids"] while len(gc_ids) > 0: obj_id = gc_ids.pop() log.info(f"got gc id: {obj_id}") if not isValidUuid(obj_id): log.error(f"bucketGC - got unexpected gc id: {obj_id}") continue if not isSchema2Id(obj_id): log.warn(f"bucketGC - ignoring v1 id: {obj_id}") continue if getCollectionForId(obj_id) == "groups": if not isRootObjId(obj_id): log.error(f"bucketGC - unexpected non-root id: {obj_id}") continue log.info(f"bucketGC - delete root objs: {obj_id}") await removeKeys(app, obj_id) elif getCollectionForId(obj_id) == "datasets": log.info(f"bucketGC - delete dataset: {obj_id}") await removeKeys(app, obj_id) else: log.error(f"bucketGC - unexpected obj_id class: {obj_id}") log.info(f"bucketGC - sleep: {async_sleep_time}") await asyncio.sleep(async_sleep_time) # shouldn't ever get here log.error("bucketGC terminating unexpectedly")
async def PUT_Group(request): """ Handler for PUT /groups""" """ Used to flush all objects under a root group to S3 """ FLUSH_TIME_OUT = 10.0 # TBD make config FLUSH_SLEEP_INTERVAL = 0.1 # TBD make config log.request(request) app = request.app params = request.rel_url.query root_id = request.match_info.get('id') if "bucket" in params: bucket = params["bucket"] else: bucket = None log.info(f"PUT group (flush): {root_id} bucket: {bucket}") # don't really need bucket param since the dirty ids know which bucket they should write too if not isValidUuid(root_id, obj_class="group"): log.error(f"Unexpected group_id: {root_id}") raise HTTPInternalServerError() schema2 = isSchema2Id(root_id) if schema2 and not isRootObjId(root_id): log.error(f"Expected root id for flush but got: {root_id}") raise HTTPInternalServerError() flush_start = time.time() flush_set = set() dirty_ids = app["dirty_ids"] for obj_id in dirty_ids: if schema2: if isValidUuid(obj_id) and getRootObjId(obj_id) == root_id: flush_set.add(obj_id) else: # for schema1 not easy to determine if a given id is in a domain, # so just wait on all of them flush_set.add(obj_id) log.debug(f"flushop - waiting on {len(flush_set)} items") while time.time() - flush_start < FLUSH_TIME_OUT: # check to see if the items in our flush set are still there remaining_set = set() for obj_id in flush_set: if not obj_id in dirty_ids: log.debug(f"flush - {obj_id} has been written") elif dirty_ids[obj_id][0] > flush_start: log.debug( f"flush - {obj_id} has been updated after flush start") else: log.debug(f"flush - {obj_id} still pending") remaining_set.add(obj_id) flush_set = remaining_set if len(flush_set) == 0: log.debug("flush op - all objects have been written") break log.debug( f"flushop - {len(flush_set)} item remaining, sleeping for {FLUSH_SLEEP_INTERVAL}" ) await asyncio.sleep(FLUSH_SLEEP_INTERVAL) if len(flush_set) > 0: log.warn( f"flushop - {len(flush_set)} items not updated after {FLUSH_TIME_OUT}" ) raise HTTPServiceUnavailable() resp = json_response(None, status=204) # NO Content response log.response(request, resp=resp) return resp