Example #1
0
async def POST_Root(request):
    """ Notify root that content in the domain has been modified.
    """
    log.request(request)
    app = request.app
    root_id = request.match_info.get('id')
    if not root_id:
        log.error("missing id in request")
        raise HTTPInternalServerError()
    if not isSchema2Id(root_id):
        log.error(f"expected schema2 id but got: {root_id}")
        raise HTTPInternalServerError()
    if not isRootObjId(root_id):
        log.error(f"Expected root id but got: {root_id}")
        raise HTTPInternalServerError()
    params = request.rel_url.query
    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None

    log.info(f"POST_Root: {root_id} bucket: {bucket}")

    # add id to be scanned by the s3sync task
    root_scan_ids = app["root_scan_ids"]
    root_scan_ids[root_id] = bucket

    resp_json = {}

    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Example #2
0
async def DELETE_Object(request):
    log.request(request)

    app = request.app
    delete_set = app["delete_set"]

    objid = request.match_info.get('id')
    if not isValidUuid(objid):
        log.warn(f"Invalid id: {objid}")
        raise HTTPBadRequest()

    if isSchema2Id(objid):
        # get rootid for this id
        collection = getCollectionForId(objid)
        if collection == "datasets":
            delete_set.add(objid)
        elif collection == "groups":
            # only need to do anything if this the root group
            if isRootObjId(objid):
                log.info(f"adding root group: {objid} to delete_set")
                delete_set.add(objid)
            else:
                log.info(f"ignoring delete non-root group: {objid}")
        elif collection == "datatypes":
            log.info(f"ignoring delete for datatype object: {objid}")
        else:
            log.error(f"Unexpected collection type: {collection}")
         
    resp_json = {}
    resp = json_response(resp_json)
    log.response(request, resp=resp)
    return resp
Example #3
0
async def delete_metadata_obj(app,
                              obj_id,
                              notify=True,
                              root_id=None,
                              bucket=None):
    """ Delete the given object """
    meta_cache = app['meta_cache']
    dirty_ids = app["dirty_ids"]
    log.info(f"delete_meta_data_obj: {obj_id} notify: {notify}")
    validateObjId(obj_id, bucket)
    if isValidDomain(obj_id):
        bucket = getBucketForDomain(obj_id)

    try:
        validateInPartition(app, obj_id)
    except KeyError:
        log.error(f"obj: {obj_id} not in partition")
        raise HTTPInternalServerError()

    deleted_ids = app['deleted_ids']
    if obj_id in deleted_ids:
        log.warn(f"{obj_id} has already been deleted")
    else:
        log.debug(f"adding {obj_id} to deleted ids")
        deleted_ids.add(obj_id)

    if obj_id in meta_cache:
        log.debug(f"removing {obj_id} from meta_cache")
        del meta_cache[obj_id]

    if obj_id in dirty_ids:
        log.debug(f"removing dirty_ids for: {obj_id}")
        del dirty_ids[obj_id]

    # remove from S3 (if present)
    s3key = getS3Key(obj_id)

    if await isS3Obj(app, s3key, bucket=bucket):
        await deleteS3Obj(app, s3key, bucket=bucket)
    else:
        log.info(
            f"delete_metadata_obj - key {s3key} not found (never written)?")

    if isValidUuid(obj_id) and isSchema2Id(obj_id):
        if isRootObjId(obj_id):
            # add to gc ids so sub-objects will be deleted
            gc_ids = app["gc_ids"]
            log.info(f"adding root id: {obj_id} for GC cleanup")
            gc_ids.add(obj_id)
        elif notify:
            root_id = getRootObjId(obj_id)
            await notify_root(app, root_id, bucket=bucket)
        # no notify for domain deletes since the root group is being deleted

    log.debug(f"delete_metadata_obj for {obj_id} done")
Example #4
0
async def bucketGC(app):
    """ remove objects from db for any deleted root groups or datasets
    """
    log.info("bucketGC start")
    async_sleep_time = int(config.get("async_sleep_time"))
    log.info("async_sleep_time: {}".format(async_sleep_time))

    # update/initialize root object before starting GC

    while True:
        if app["node_state"] != "READY":
            log.info("bucketGC - waiting for Node state to be READY")
            await asyncio.sleep(async_sleep_time)
            continue  # wait for READY state

        gc_ids = app["gc_ids"]
        while len(gc_ids) > 0:
            obj_id = gc_ids.pop()
            log.info(f"got gc id: {obj_id}")
            if not isValidUuid(obj_id):
                log.error(f"bucketGC - got unexpected gc id: {obj_id}")
                continue
            if not isSchema2Id(obj_id):
                log.warn(f"bucketGC - ignoring v1 id: {obj_id}")
                continue
            if getCollectionForId(obj_id) == "groups":
                if not isRootObjId(obj_id):
                    log.error(f"bucketGC - unexpected non-root id: {obj_id}")
                    continue
                log.info(f"bucketGC - delete root objs: {obj_id}")
                await removeKeys(app, obj_id)
            elif getCollectionForId(obj_id) == "datasets":
                log.info(f"bucketGC - delete dataset: {obj_id}")
                await removeKeys(app, obj_id)
            else:
                log.error(f"bucketGC - unexpected obj_id class: {obj_id}")

        log.info(f"bucketGC - sleep: {async_sleep_time}")
        await asyncio.sleep(async_sleep_time)

    # shouldn't ever get here
    log.error("bucketGC terminating unexpectedly")
Example #5
0
async def PUT_Group(request):
    """ Handler for PUT /groups"""
    """ Used to flush all objects under a root group to S3 """

    FLUSH_TIME_OUT = 10.0  # TBD make config
    FLUSH_SLEEP_INTERVAL = 0.1  # TBD make config
    log.request(request)
    app = request.app
    params = request.rel_url.query

    root_id = request.match_info.get('id')
    if "bucket" in params:
        bucket = params["bucket"]
    else:
        bucket = None
    log.info(f"PUT group (flush): {root_id}  bucket: {bucket}")
    # don't really need bucket param since the dirty ids know which bucket they should write too

    if not isValidUuid(root_id, obj_class="group"):
        log.error(f"Unexpected group_id: {root_id}")
        raise HTTPInternalServerError()

    schema2 = isSchema2Id(root_id)

    if schema2 and not isRootObjId(root_id):
        log.error(f"Expected root id for flush but got: {root_id}")
        raise HTTPInternalServerError()

    flush_start = time.time()
    flush_set = set()
    dirty_ids = app["dirty_ids"]

    for obj_id in dirty_ids:
        if schema2:
            if isValidUuid(obj_id) and getRootObjId(obj_id) == root_id:
                flush_set.add(obj_id)
        else:
            # for schema1 not easy to determine if a given id is in a domain,
            # so just wait on all of them
            flush_set.add(obj_id)

    log.debug(f"flushop - waiting on {len(flush_set)} items")
    while time.time() - flush_start < FLUSH_TIME_OUT:
        # check to see if the items in our flush set are still there

        remaining_set = set()
        for obj_id in flush_set:
            if not obj_id in dirty_ids:
                log.debug(f"flush - {obj_id} has been written")
            elif dirty_ids[obj_id][0] > flush_start:
                log.debug(
                    f"flush - {obj_id} has been updated after flush start")
            else:
                log.debug(f"flush - {obj_id} still pending")
                remaining_set.add(obj_id)
        flush_set = remaining_set
        if len(flush_set) == 0:
            log.debug("flush op - all objects have been written")
            break
        log.debug(
            f"flushop - {len(flush_set)} item remaining, sleeping for {FLUSH_SLEEP_INTERVAL}"
        )
        await asyncio.sleep(FLUSH_SLEEP_INTERVAL)

    if len(flush_set) > 0:
        log.warn(
            f"flushop - {len(flush_set)} items not updated after {FLUSH_TIME_OUT}"
        )
        raise HTTPServiceUnavailable()

    resp = json_response(None, status=204)  # NO Content response
    log.response(request, resp=resp)
    return resp