예제 #1
0
def delete(uuid: str, replica: str):
    owner = security.get_token_email(request.token_info)

    es_client = ElasticsearchClient.get()

    try:
        response = es_client.get(index=Config.get_es_index_name(
            ESIndexType.subscriptions, Replica[replica]),
                                 doc_type=ESDocType.subscription.name,
                                 id=uuid)
    except NotFoundError:
        raise DSSException(requests.codes.not_found, "not_found",
                           "Cannot find subscription!")

    stored_metadata = response['_source']

    if stored_metadata['owner'] != owner:
        # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent.
        raise DSSException(requests.codes.forbidden, "Forbidden",
                           "Your credentials can't access this subscription!")

    _delete_subscription(es_client, uuid)

    timestamp = datetime.datetime.utcnow()
    time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ")

    return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
예제 #2
0
def delete(uuid: str, replica: str):
    authenticated_user_email = security.get_token_email(request.token_info)

    uuid = uuid.lower()
    tombstone_key = CollectionTombstoneID(uuid, version=None).to_key()

    tombstone_object_data = dict(email=authenticated_user_email)

    owner = get_impl(uuid=uuid, replica=replica)["owner"]
    if owner != authenticated_user_email:
        raise DSSException(requests.codes.forbidden, "forbidden",
                           f"Collection access denied")

    created, idempotent = idempotent_save(
        Config.get_blobstore_handle(Replica[replica]), Replica[replica].bucket,
        tombstone_key,
        json.dumps(tombstone_object_data).encode("utf-8"))
    if not idempotent:
        raise DSSException(
            requests.codes.conflict, f"collection_tombstone_already_exists",
            f"collection tombstone with UUID {uuid} already exists")
    status_code = requests.codes.ok
    response_body = dict()  # type: dict
    # update dynamoDB
    owner_lookup.delete_collection_uuid(owner=authenticated_user_email,
                                        uuid=uuid)
    return jsonify(response_body), status_code
예제 #3
0
def get_impl(uuid: str, replica: str, version: str = None):
    uuid = uuid.lower()
    bucket = Replica[replica].bucket
    handle = Config.get_blobstore_handle(Replica[replica])

    tombstone_key = CollectionTombstoneID(uuid, version=None).to_key()
    if test_object_exists(handle, bucket, tombstone_key):
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))

    if version is None:
        # list the collections and find the one that is the most recent.
        prefix = CollectionFQID(uuid, version=None).to_key_prefix()
        for matching_key in handle.list(bucket, prefix):
            matching_key = matching_key[len(prefix):]
            if version is None or matching_key > version:
                version = matching_key
    try:
        collection_blob = handle.get(bucket,
                                     CollectionFQID(uuid, version).to_key())
    except BlobNotFoundError:
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))
    return json.loads(collection_blob)
def delete(uuid: str, replica: str):
    authenticated_user_email = request.token_info['email']

    es_client = ElasticsearchClient.get()

    try:
        response = es_client.get(index=Config.get_es_index_name(
            ESIndexType.subscriptions, Replica[replica]),
                                 doc_type=ESDocType.subscription.name,
                                 id=uuid)
    except NotFoundError as ex:
        raise DSSException(requests.codes.not_found, "not_found",
                           "Cannot find subscription!")

    stored_metadata = response['_source']

    if stored_metadata['owner'] != authenticated_user_email:
        # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent.
        raise DSSException(requests.codes.forbidden, "Forbidden",
                           "Your credentials can't access this subscription!")

    #  get all indexes that use current alias
    alias_name = Config.get_es_alias_name(ESIndexType.docs, Replica[replica])
    doc_indexes = _get_indexes_by_alias(es_client, alias_name)
    _unregister_percolate(es_client, doc_indexes, uuid)

    es_client.delete(index=Config.get_es_index_name(ESIndexType.subscriptions,
                                                    Replica[replica]),
                     doc_type=ESDocType.subscription.name,
                     id=uuid)

    timestamp = datetime.datetime.utcnow()
    time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ")

    return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
예제 #5
0
def get(uuid: str, replica: str):
    owner = security.get_token_email(request.token_info)

    es_client = ElasticsearchClient.get()
    try:
        response = es_client.get(index=Config.get_es_index_name(
            ESIndexType.subscriptions, Replica[replica]),
                                 doc_type=ESDocType.subscription.name,
                                 id=uuid)
    except NotFoundError:
        raise DSSException(requests.codes.not_found, "not_found",
                           "Cannot find subscription!")

    source = response['_source']
    source['uuid'] = uuid
    source['replica'] = replica
    if 'hmac_key_id' in response:
        source['hmac_key_id'] = response['hmac_key_id']
    if 'hmac_secret_key' in source:
        source.pop('hmac_secret_key')
    if source['owner'] != owner:
        # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent.
        raise DSSException(requests.codes.forbidden, "Forbidden",
                           "Your credentials can't access this subscription!")

    return jsonify(source), requests.codes.okay
예제 #6
0
def verify_jwt(token: str) -> typing.Optional[typing.Mapping]:
    try:
        unverified_token = jwt.decode(token, verify=False)
    except jwt.DecodeError:
        logger.info(f"Failed to decode JWT: {token}", exc_info=True)
        raise DSSException(401, 'Unauthorized', 'Failed to decode token.')

    assert_authorized_issuer(unverified_token)
    issuer = unverified_token['iss']
    public_keys = get_public_keys(issuer)

    try:
        token_header = jwt.get_unverified_header(token)
        verified_tok = jwt.decode(
            token,
            key=public_keys[token_header["kid"]],
            issuer=issuer,
            audience=Config.get_audience(),
            algorithms=allowed_algorithms,
        )
        logger.info("""{"valid": true, "token": %s}""",
                    json.dumps(verified_tok))
    except jwt.PyJWTError as ex:  # type: ignore
        logger.info("""{"valid": false, "token": %s}""",
                    json.dumps(unverified_token),
                    exc_info=True)
        raise DSSException(401, 'Unauthorized',
                           'Authorization token is invalid') from ex
    return verified_tok
예제 #7
0
def get_token_email(token_info: typing.Mapping[str, typing.Any]) -> str:
    try:
        email_claim = Config.get_OIDC_email_claim()
        return token_info.get(email_claim) or token_info['email']
    except KeyError:
        raise DSSException(401, 'Unauthorized',
                           'Authorization token is missing email claims.')
예제 #8
0
def resolve_content_item(replica: Replica, blobstore_handle: BlobStore,
                         item: dict):
    try:
        if item["type"] in {"file", "bundle", "collection"}:
            item_metadata = get_json_metadata(item["type"], item["uuid"],
                                              item["version"], replica,
                                              blobstore_handle)
        else:
            item_metadata = get_json_metadata("file", item["uuid"],
                                              item["version"], replica,
                                              blobstore_handle)
            if "fragment" not in item:
                raise Exception(
                    'The "fragment" field is required in collection elements '
                    'other than files, bundles, and collections')
            blob_path = compose_blob_key(item_metadata)
            # check that item is marked as metadata, is json, and is less than max size
            item_doc = json.loads(
                blobstore_handle.get(replica.bucket, blob_path))
            item_content = jsonpointer.resolve_pointer(item_doc,
                                                       item["fragment"])
            return item_content
    except DSSException:
        raise
    except Exception as e:
        raise DSSException(
            requests.codes.unprocessable_entity, "invalid_link",
            'Error while parsing the link "{}": {}: {}'.format(
                item,
                type(e).__name__, e))
예제 #9
0
def get(uuid: str, replica: str, version: str = None):
    authenticated_user_email = security.get_token_email(request.token_info)
    collection_body = get_impl(uuid=uuid, replica=replica, version=version)
    if collection_body["owner"] != authenticated_user_email:
        raise DSSException(requests.codes.forbidden, "forbidden",
                           f"Collection access denied")
    return collection_body
예제 #10
0
def get(uuid: str, replica: str):
    owner = security.get_token_email(request.token_info)
    subscription = get_subscription(Replica[replica], owner, uuid)
    if subscription is None or owner != subscription[SubscriptionData.OWNER]:
        raise DSSException(404, "not_found", "Cannot find subscription!")
    if 'hmac_secret_key' in subscription:
        subscription.pop('hmac_secret_key')
    return subscription, requests.codes.ok
예제 #11
0
def delete(uuid: str, replica: str):
    owner = security.get_token_email(request.token_info)
    subscription = get_subscription(Replica[replica], owner, uuid)
    if subscription is None or owner != subscription[SubscriptionData.OWNER]:
        raise DSSException(404, "not_found", "Cannot find subscription!")
    delete_subscription(Replica[replica], owner, uuid)
    timestamp = datetime.datetime.utcnow()
    time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ")
    return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
예제 #12
0
def get(replica: str, checkout_job_id: str):
    assert replica is not None
    _replica = Replica[replica]
    try:
        response = get_bundle_checkout_status(checkout_job_id, _replica,
                                              _replica.checkout_bucket)
    except BlobNotFoundError:
        raise DSSException(requests.codes.not_found, "not_found",
                           "Cannot find checkout!")
    return response, requests.codes.ok
예제 #13
0
def put(json_request_body: dict, replica: str):
    owner = security.get_token_email(request.token_info)
    if count_subscriptions_for_owner(Replica[replica], owner) > SUBSCRIPTION_LIMIT:
        raise DSSException(requests.codes.not_acceptable, "not_acceptable",
                           f"Users cannot exceed {SUBSCRIPTION_LIMIT} subscriptions!")

    subscription_doc = json_request_body.copy()
    subscription_doc[SubscriptionData.OWNER] = security.get_token_email(request.token_info)
    subscription_uuid = str(uuid4())
    subscription_doc[SubscriptionData.UUID] = subscription_uuid
    subscription_doc[SubscriptionData.REPLICA] = Replica[replica].name
    if subscription_doc.get(SubscriptionData.JMESPATH_QUERY) is not None:
        try:
            jmespath.compile(subscription_doc[SubscriptionData.JMESPATH_QUERY])
        except JMESPathError:
            raise DSSException(
                requests.codes.bad_request,
                "invalid_jmespath",
                "JMESPath query is invalid"
            )
    # validate attachment JMESPath if present
    attachments = subscription_doc.get(SubscriptionData.ATTACHMENTS)
    if attachments is not None:
        for name, definition in attachments.items():
            if name.startswith('_'):
                raise DSSException(requests.codes.bad_request,
                                   "invalid_attachment_name",
                                   f"Attachment names must not start with underscore ({name})")
            type_ = definition['type']
            if type_ == 'jmespath':
                expression = definition['expression']
                try:
                    jmespath.compile(expression)
                except JMESPathError as e:
                    raise DSSException(requests.codes.bad_request,
                                       "invalid_attachment_expression",
                                       f"Unable to compile JMESPath expression for attachment {name}") from e
            else:
                assert False, type_
    put_subscription(subscription_doc)
    return subscription_doc, requests.codes.created
예제 #14
0
def post(uuid: str,
         json_request_body: dict,
         replica: str,
         version: str = None):
    assert replica is not None
    _replica: Replica = Replica[replica]
    dst_bucket = json_request_body.get('destination', _replica.checkout_bucket)
    if '/' in dst_bucket:
        raise DSSException(400, "illegal_arguments",
                           "Destination bucket invalid!")
    try:
        execution_id = start_bundle_checkout(
            _replica,
            uuid,
            version,
            dst_bucket=dst_bucket,
            email_address=json_request_body.get('email', None),
        )
    except BundleNotFoundError:
        raise DSSException(404, "not_found", "Cannot find bundle!")
    return jsonify(dict(checkout_job_id=execution_id)), requests.codes.ok
예제 #15
0
def patch(uuid: str, json_request_body: dict, replica: str, version: str):
    authenticated_user_email = security.get_token_email(request.token_info)

    uuid = uuid.lower()
    owner = get_impl(uuid=uuid, replica=replica)["owner"]
    if owner != authenticated_user_email:
        raise DSSException(requests.codes.forbidden, "forbidden",
                           f"Collection access denied")

    handle = Config.get_blobstore_handle(Replica[replica])
    try:
        cur_collection_blob = handle.get(
            Replica[replica].bucket,
            CollectionFQID(uuid, version).to_key())
    except BlobNotFoundError:
        raise DSSException(
            404, "not_found",
            "Could not find collection for UUID {}".format(uuid))
    collection = json.loads(cur_collection_blob)
    for field in "name", "description", "details":
        if field in json_request_body:
            collection[field] = json_request_body[field]
    remove_contents_set = set(
        map(hashabledict, json_request_body.get("remove_contents", [])))
    collection["contents"] = [
        i for i in collection["contents"]
        if hashabledict(i) not in remove_contents_set
    ]
    verify_collection(json_request_body.get("add_contents", []),
                      Replica[replica], handle)
    collection["contents"].extend(json_request_body.get("add_contents", []))
    collection["contents"] = _dedpuplicate_contents(collection["contents"])
    timestamp = datetime.datetime.utcnow()
    new_collection_version = datetime_to_version_format(timestamp)
    handle.upload_file_handle(
        Replica[replica].bucket,
        CollectionFQID(uuid, new_collection_version).to_key(),
        io.BytesIO(json.dumps(collection).encode("utf-8")))
    return jsonify(dict(uuid=uuid,
                        version=new_collection_version)), requests.codes.ok
예제 #16
0
def get_json_metadata(entity_type: str,
                      uuid: str,
                      version: str,
                      replica: Replica,
                      blobstore_handle: BlobStore,
                      max_metadata_size: int = MAX_METADATA_SIZE):
    try:
        key = "{}s/{}.{}".format(entity_type, uuid, version)
        # TODO: verify that file is a metadata file
        size = blobstore_handle.get_size(replica.bucket, key)
        if size > max_metadata_size:
            raise DSSException(
                requests.codes.unprocessable_entity, "invalid_link",
                "The file UUID {} refers to a file that is too large to process"
                .format(uuid))
        return json.loads(
            blobstore_handle.get(
                replica.bucket, "{}s/{}.{}".format(entity_type, uuid,
                                                   version)))
    except BlobNotFoundError:
        raise DSSException(requests.codes.unprocessable_entity, "invalid_link",
                           "Could not find file for UUID {}".format(uuid))
예제 #17
0
def is_DSS_VERSION(val):
    """
    Verifies `val` is compliant with expected format. See for more info on connexion custom type formats
    https://connexion.readthedocs.io/en/latest/cookbook.html#custom-type-format.
    :param val: the value to verify
    :return: the verified value
    """
    from iso8601 import iso8601
    # convert it to date-time so we can format exactly as the system requires (with microsecond precision)
    try:
        timestamp = iso8601.parse_date(val)
    except iso8601.ParseError:
        raise DSSException(
            requests.codes.bad_request, "illegal_version",
            f"version should be an RFC3339 compliant timestamp")
    timestamp = datetime_to_version_format(timestamp)
    if timestamp != val:
        raise DSSException(
            requests.codes.bad_request, "illegal_version",
            f"version should be a DSS_VERSION with the format 'YYYY-MM-DDTHHmmSS.zzzzzzZ'"
        )
    return val
예제 #18
0
    def test_502_get_bundle_HAS_retry_after_response(self):
        """Mock seems resistant to multiple calls, therefore this is only used for one endpoint."""
        with mock.patch('dss.api.bundles.get',
                        side_effect=DSSException(502, 'bad_gateway',
                                                 "Bad Gateway")):
            self.app = ThreadedLocalServer()
            self.app.start()
            uuid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"
            version = datetime_to_version_format(datetime.datetime.utcnow())

            url = str(UrlBuilder().set(path=f"/v1/bundles/{uuid}").add_query(
                "version", version).add_query("replica", 'aws'))

            r = self.assertGetResponse(url, 502, headers=get_auth_header())
            self.assertEqual(int(r.response.headers['Retry-After']), 10)
            self.app.shutdown()
예제 #19
0
def list_events(replica: str,
                from_date: str = None,
                to_date: str = None,
                per_page: int = 1,
                token: str = None):
    if token:
        fdate = datetime_from_timestamp(token)
    else:
        fdate = datetime_from_timestamp(
            from_date) if from_date else datetime.min
    tdate = datetime_from_timestamp(to_date) if to_date else datetime.max
    if fdate > tdate:
        raise DSSException(400, "bad_request",
                           "to_date must be greater than from_date")
    ff = Config.get_flashflood_handle(Replica[replica].flashflood_prefix_read)
    event_streams = list()
    for i, event_stream in enumerate(ff.list_event_streams(fdate, tdate)):
        if datetime_from_timestamp(event_stream['from_date']) < tdate:
            event_streams.append(event_stream)
        else:
            break
        if i == per_page:
            break

    if len(event_streams) <= per_page:
        response = make_response(jsonify(dict(event_streams=event_streams)),
                                 requests.codes.ok)
        response.headers['X-OpenAPI-Pagination'] = 'false'
    else:
        next_url = UrlBuilder(request.url)
        next_url.replace_query("token", event_streams[-1]['from_date'])
        link = f"<{next_url}>; rel='next'"
        response = make_response(
            jsonify(dict(event_streams=event_streams[:-1])),
            requests.codes.partial)
        response.headers['Link'] = link
        response.headers['X-OpenAPI-Pagination'] = 'true'
    response.headers['X-OpenAPI-Paginated-Content-Key'] = 'event_streams'
    return response
예제 #20
0
def mock_500_server_error():
    raise DSSException(requests.codes.internal_server_error,
                       "internal_server_error", "Internal Server Error")
def put(json_request_body: dict, replica: str):
    uuid = str(uuid4())
    es_query = json_request_body['es_query']
    owner = request.token_info['email']

    es_client = ElasticsearchClient.get()

    index_mapping = {
        "mappings": {
            ESDocType.subscription.name: {
                "properties": {
                    "owner": {
                        "type": "string",
                        "index": "not_analyzed"
                    },
                    "es_query": {
                        "type": "object",
                        "enabled": "false"
                    }
                }
            }
        }
    }
    # Elasticsearch preprocesses inputs by splitting strings on punctuation.
    # So for [email protected], if I searched for people with the email address [email protected],
    # [email protected] would show up because elasticsearch matched example w/ example.
    # By including "index": "not_analyzed", Elasticsearch leaves all owner inputs alone.
    index_name = Config.get_es_index_name(ESIndexType.subscriptions,
                                          Replica[replica])
    IndexManager.get_subscription_index(es_client, index_name, index_mapping)

    #  get all indexes that use current alias
    alias_name = Config.get_es_alias_name(ESIndexType.docs, Replica[replica])
    doc_indexes = _get_indexes_by_alias(es_client, alias_name)

    #  try to subscribe query to each of the indexes.
    subscribed_indexes = []
    for doc_index in doc_indexes:
        try:
            percolate_registration = _register_percolate(
                es_client, doc_index, uuid, es_query, replica)
        except ElasticsearchException as ex:
            logger.debug(
                f"Exception occured when registering a document to an index. Exception: {ex}"
            )
            last_ex = ex
        else:
            logger.debug(
                f"Percolate query registration succeeded:\n{percolate_registration}"
            )
            subscribed_indexes.append(doc_index)

    # Queries are unlikely to fit in all of the indexes, therefore errors will almost always occur. Only return an error
    # if no queries are successfully indexed.
    if doc_indexes and not subscribed_indexes:
        logger.critical(
            f"Percolate query registration failed: owner: {owner}, uuid: {uuid}, "
            f"replica: {replica}, es_query: {es_query}, Exception: {last_ex}")
        raise DSSException(
            requests.codes.internal_server_error, "elasticsearch_error",
            "Unable to register elasticsearch percolate query!") from last_ex

    json_request_body['owner'] = owner

    try:
        subscription_registration = _register_subscription(
            es_client, uuid, json_request_body, replica)
        logger.debug(
            f"Event Subscription succeeded:\n{subscription_registration}")
    except ElasticsearchException as ex:
        logger.critical(
            f"Event Subscription failed: owner: {owner}, uuid: {uuid}, "
            f"replica: {replica}, Exception: {ex}")

        # Delete percolate query to make sure queries and subscriptions are in sync.
        doc_indexes = _get_indexes_by_alias(es_client, alias_name)
        _unregister_percolate(es_client, doc_indexes, uuid)

        raise DSSException(
            requests.codes.internal_server_error, "elasticsearch_error",
            "Unable to register subscription! Rolling back percolate query.")

    return jsonify(dict(uuid=uuid)), requests.codes.created
예제 #22
0
def mock_501_not_implemented():
    raise DSSException(requests.codes.not_implemented, "not_implemented",
                       "Not Implemented")
예제 #23
0
def mock_502_bad_gateway():
    raise DSSException(requests.codes.bad_gateway, "bad_gateway",
                       "Bad Gateway")
예제 #24
0
def mock_503_service_unavailable():
    raise DSSException(requests.codes.service_unavailable,
                       "service_unavailable", "Service Unavailable")
예제 #25
0
def mock_504_gateway_timeout():
    raise DSSException(requests.codes.gateway_timeout, "gateway_timeout",
                       "Gateway Timeout")
예제 #26
0
def get(uuid: str, replica: str, version: str = None):
    key = f"bundles/{uuid}.{version}"
    doc = events.get_bundle_metadata_document(Replica[replica], key)
    if doc is None:
        raise DSSException(404, "not_found", "Cannot find event!")
    return doc