async def PUT_Object(request): """HTTP method to notify creation/update of objid""" log.request(request) app = request.app pending_set = app["pending"] objid = request.match_info.get('id') if not objid: log.error("PUT_Object with no id") raise HTTPBadRequest() log.info(f"PUT_Object/{objid}") if not isValidUuid(objid): log.warn(f"Invalid id: {objid}, ignoring") raise HTTPBadRequest() if isSchema2Id(objid): rootid = getRootObjId(objid) log.debug(f"adding root: {rootid} to pending queue for objid: {objid}") pending_set.add(rootid) resp_json = { } resp = json_response(resp_json, status=201) log.response(request, resp=resp) return resp
async def delete_metadata_obj(app, obj_id, notify=True, root_id=None, bucket=None): """ Delete the given object """ meta_cache = app['meta_cache'] dirty_ids = app["dirty_ids"] log.info(f"delete_meta_data_obj: {obj_id} notify: {notify}") validateObjId(obj_id, bucket) if isValidDomain(obj_id): bucket = getBucketForDomain(obj_id) try: validateInPartition(app, obj_id) except KeyError: log.error(f"obj: {obj_id} not in partition") raise HTTPInternalServerError() deleted_ids = app['deleted_ids'] if obj_id in deleted_ids: log.warn(f"{obj_id} has already been deleted") else: log.debug(f"adding {obj_id} to deleted ids") deleted_ids.add(obj_id) if obj_id in meta_cache: log.debug(f"removing {obj_id} from meta_cache") del meta_cache[obj_id] if obj_id in dirty_ids: log.debug(f"removing dirty_ids for: {obj_id}") del dirty_ids[obj_id] # remove from S3 (if present) s3key = getS3Key(obj_id) if await isS3Obj(app, s3key, bucket=bucket): await deleteS3Obj(app, s3key, bucket=bucket) else: log.info( f"delete_metadata_obj - key {s3key} not found (never written)?") if isValidUuid(obj_id) and isSchema2Id(obj_id): if isRootObjId(obj_id): # add to gc ids so sub-objects will be deleted gc_ids = app["gc_ids"] log.info(f"adding root id: {obj_id} for GC cleanup") gc_ids.add(obj_id) elif notify: root_id = getRootObjId(obj_id) await notify_root(app, root_id, bucket=bucket) # no notify for domain deletes since the root group is being deleted log.debug(f"delete_metadata_obj for {obj_id} done")
async def PUT_Objects(request): """HTTP method to notify creation/update of objid""" log.request(request) app = request.app pending_set = app["pending"] log.info("PUT_Objects") if not request.has_body: msg = "PUT objects with no body" log.warn(msg) raise HTTPBadRequest(reason=msg) body = await request.json() log.debug("Got PUT Objects body: {}".format(body)) if "objs" not in body: msg = "expected to find objs key in body" log.warn(msg) raise HTTPBadRequest(reason=msg) objs = body["objs"] for objid in objs: log.debug("PUT_Objects, objid: {}".format(objid)) if not isValidUuid(objid): log.warn(f"Invalid id: {objid}, ignoring") continue if not isSchema2Id(objid): log.info(f"PUT_Objects ignoring v1 id: {objid}") continue rootid = getRootObjId(objid) log.debug(f"adding root: {rootid} to pending queue for objid: {objid}") pending_set.add(rootid) resp_json = { } resp = json_response(resp_json, status=201) log.response(request, resp=resp) return resp
async def PUT_Group(request): """ Handler for PUT /groups""" """ Used to flush all objects under a root group to S3 """ FLUSH_TIME_OUT = 10.0 # TBD make config FLUSH_SLEEP_INTERVAL = 0.1 # TBD make config log.request(request) app = request.app params = request.rel_url.query root_id = request.match_info.get('id') if "bucket" in params: bucket = params["bucket"] else: bucket = None log.info(f"PUT group (flush): {root_id} bucket: {bucket}") # don't really need bucket param since the dirty ids know which bucket they should write too if not isValidUuid(root_id, obj_class="group"): log.error(f"Unexpected group_id: {root_id}") raise HTTPInternalServerError() schema2 = isSchema2Id(root_id) if schema2 and not isRootObjId(root_id): log.error(f"Expected root id for flush but got: {root_id}") raise HTTPInternalServerError() flush_start = time.time() flush_set = set() dirty_ids = app["dirty_ids"] for obj_id in dirty_ids: if schema2: if isValidUuid(obj_id) and getRootObjId(obj_id) == root_id: flush_set.add(obj_id) else: # for schema1 not easy to determine if a given id is in a domain, # so just wait on all of them flush_set.add(obj_id) log.debug(f"flushop - waiting on {len(flush_set)} items") while time.time() - flush_start < FLUSH_TIME_OUT: # check to see if the items in our flush set are still there remaining_set = set() for obj_id in flush_set: if not obj_id in dirty_ids: log.debug(f"flush - {obj_id} has been written") elif dirty_ids[obj_id][0] > flush_start: log.debug( f"flush - {obj_id} has been updated after flush start") else: log.debug(f"flush - {obj_id} still pending") remaining_set.add(obj_id) flush_set = remaining_set if len(flush_set) == 0: log.debug("flush op - all objects have been written") break log.debug( f"flushop - {len(flush_set)} item remaining, sleeping for {FLUSH_SLEEP_INTERVAL}" ) await asyncio.sleep(FLUSH_SLEEP_INTERVAL) if len(flush_set) > 0: log.warn( f"flushop - {len(flush_set)} items not updated after {FLUSH_TIME_OUT}" ) raise HTTPServiceUnavailable() resp = json_response(None, status=204) # NO Content response log.response(request, resp=resp) return resp
async def write_s3_obj(app, obj_id, bucket=None): """ writes the given object to s3 """ s3key = getS3Key(obj_id) log.info( f"write_s3_obj for obj_id: {obj_id} / s3_key: {s3key} bucket: {bucket}" ) pending_s3_write = app["pending_s3_write"] pending_s3_write_tasks = app["pending_s3_write_tasks"] dirty_ids = app["dirty_ids"] chunk_cache = app['chunk_cache'] meta_cache = app['meta_cache'] deflate_map = app['deflate_map'] shuffle_map = app['shuffle_map'] notify_objs = app["root_notify_ids"] deleted_ids = app['deleted_ids'] success = False if isValidDomain(obj_id): domain_bucket = getBucketForDomain(obj_id) if bucket and bucket != domain_bucket: log.error( f"expected bucket for domain: {obj_id} to match what wsas passed to write_s3_obj" ) else: bucket = domain_bucket if s3key in pending_s3_write: msg = f"write_s3_key - not expected for key {s3key} to be in pending_s3_write map" log.error(msg) raise KeyError(msg) if obj_id not in pending_s3_write_tasks: # don't allow reentrant write log.debug(f"write_s3_obj for {obj_id} not s3sync task") if obj_id in deleted_ids and isValidUuid(obj_id): # if this objid has been deleted (and its unique since this is not a domain id) # cancel any pending task and return log.warn(f"Canceling write for {obj_id} since it has been deleted") if obj_id in pending_s3_write_tasks: log.info(f"removing pending s3 write task for {obj_id}") task = pending_s3_write_tasks[obj_id] task.cancel() del pending_s3_write_tasks[obj_id] return None now = time.time() last_update_time = now if obj_id in dirty_ids: last_update_time = dirty_ids[obj_id][ 0] # timestamp is first element of two-tuple if last_update_time > now: msg = f"last_update time {last_update_time} is in the future for obj_id: {obj_id}" log.error(msg) raise ValueError(msg) pending_s3_write[s3key] = now # do the following in the try block so we can always remove the pending_s3_write at the end try: if isValidChunkId(obj_id): if obj_id not in chunk_cache: log.error(f"expected to find obj_id: {obj_id} in chunk cache") raise KeyError(f"{obj_id} not found in chunk cache") if not chunk_cache.isDirty(obj_id): log.error(f"expected chunk cache obj {obj_id} to be dirty") raise ValueError("bad dirty state for obj") chunk_arr = chunk_cache[obj_id] chunk_bytes = arrayToBytes(chunk_arr) dset_id = getDatasetId(obj_id) deflate_level = None shuffle = 0 if dset_id in shuffle_map: shuffle = shuffle_map[dset_id] if dset_id in deflate_map: deflate_level = deflate_map[dset_id] log.debug( f"got deflate_level: {deflate_level} for dset: {dset_id}") if dset_id in shuffle_map: shuffle = shuffle_map[dset_id] log.debug(f"got shuffle size: {shuffle} for dset: {dset_id}") await putS3Bytes(app, s3key, chunk_bytes, shuffle=shuffle, deflate_level=deflate_level, bucket=bucket) success = True # if chunk has been evicted from cache something has gone wrong if obj_id not in chunk_cache: msg = f"expected to find {obj_id} in chunk_cache" log.error(msg) elif obj_id in dirty_ids and dirty_ids[obj_id][ 0] > last_update_time: log.info( f"write_s3_obj {obj_id} got updated while s3 write was in progress" ) else: # no new write, can clear dirty chunk_cache.clearDirty(obj_id) # allow eviction from cache log.debug( "putS3Bytes Chunk cache utilization: {} per, dirty_count: {}" .format(chunk_cache.cacheUtilizationPercent, chunk_cache.dirtyCount)) else: # meta data update # check for object in meta cache if obj_id not in meta_cache: log.error(f"expected to find obj_id: {obj_id} in meta cache") raise KeyError(f"{obj_id} not found in meta cache") if not meta_cache.isDirty(obj_id): log.error(f"expected meta cache obj {obj_id} to be dirty") raise ValueError("bad dirty state for obj") obj_json = meta_cache[obj_id] await putS3JSONObj(app, s3key, obj_json, bucket=bucket) success = True # should still be in meta_cache... if obj_id in deleted_ids: log.info( f"obj {obj_id} has been deleted while write was in progress" ) elif obj_id not in meta_cache: msg = f"expected to find {obj_id} in meta_cache" log.error(msg) elif obj_id in dirty_ids and dirty_ids[obj_id][ 0] > last_update_time: log.info( f"write_s3_obj {obj_id} got updated while s3 write was in progress" ) else: meta_cache.clearDirty(obj_id) # allow eviction from cache finally: # clear pending_s3_write item log.debug(f"write_s3_obj finally block, success={success}") if s3key not in pending_s3_write: msg = f"write s3 obj: Expected to find {s3key} in pending_s3_write map" log.error(msg) else: if pending_s3_write[s3key] != now: msg = f"pending_s3_write timestamp got updated unexpectedly for {s3key}" log.error(msg) del pending_s3_write[s3key] # clear task if obj_id not in pending_s3_write_tasks: log.debug(f"no pending s3 write task for {obj_id}") else: log.debug(f"removing pending s3 write task for {obj_id}") del pending_s3_write_tasks[obj_id] # clear dirty flag if obj_id in dirty_ids and dirty_ids[obj_id][0] == last_update_time: log.debug(f"clearing dirty flag for {obj_id}") del dirty_ids[obj_id] # add to map so that root can be notified about changed objects if isValidUuid(obj_id) and isSchema2Id(obj_id): root_id = getRootObjId(obj_id) notify_objs[root_id] = bucket # calculate time to do the write elapsed_time = time.time() - now log.info(f"s3 write for {s3key} took {elapsed_time:.3f}s") return obj_id
async def save_metadata_obj(app, obj_id, obj_json, bucket=None, notify=False, flush=False): """ Persist the given object """ log.info( f"save_metadata_obj {obj_id} bucket={bucket} notify={notify} flush={flush}" ) if notify and not flush: log.error("notify not valid when flush is false") raise HTTPInternalServerError() validateObjId(obj_id, bucket) if not isinstance(obj_json, dict): log.error("Passed non-dict obj to save_metadata_obj") raise HTTPInternalServerError() try: validateInPartition(app, obj_id) except KeyError: log.error("Domain not in partition") raise HTTPInternalServerError() dirty_ids = app["dirty_ids"] deleted_ids = app['deleted_ids'] if obj_id in deleted_ids: if isValidUuid(obj_id): # domain objects may be re-created, but shouldn't see repeats of # deleted uuids log.warn(f"{obj_id} has been deleted") raise HTTPInternalServerError() elif obj_id in deleted_ids: deleted_ids.remove(obj_id) # un-gone the domain id # update meta cache meta_cache = app['meta_cache'] log.debug(f"save: {obj_id} to cache") meta_cache[obj_id] = obj_json meta_cache.setDirty(obj_id) now = int(time.time()) if flush: # write to S3 immediately if isValidChunkId(obj_id): log.warn("flush not supported for save_metadata_obj with chunks") raise HTTPBadRequest() try: await write_s3_obj(app, obj_id, bucket=bucket) except KeyError as ke: log.error(f"s3 sync got key error: {ke}") raise HTTPInternalServerError() except HTTPInternalServerError: log.warn(f" failed to write {obj_id}") raise # re-throw if obj_id in dirty_ids: log.warn( f"save_metadata_obj flush - object {obj_id} is still dirty") # message AN immediately if notify flag is set # otherwise AN will be notified at next S3 sync if notify: if isValidUuid(obj_id) and isSchema2Id(obj_id): root_id = getRootObjId(obj_id) await notify_root(app, root_id, bucket=bucket) else: log.debug(f"setting dirty_ids[{obj_id}] = ({now}, {bucket})") if isValidUuid(obj_id) and not bucket: log.warn(f"bucket is not defined for save_metadata_obj: {obj_id}") dirty_ids[obj_id] = (now, bucket)