def delete(uuid: str, replica: str): owner = security.get_token_email(request.token_info) es_client = ElasticsearchClient.get() try: response = es_client.get(index=Config.get_es_index_name( ESIndexType.subscriptions, Replica[replica]), doc_type=ESDocType.subscription.name, id=uuid) except NotFoundError: raise DSSException(requests.codes.not_found, "not_found", "Cannot find subscription!") stored_metadata = response['_source'] if stored_metadata['owner'] != owner: # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent. raise DSSException(requests.codes.forbidden, "Forbidden", "Your credentials can't access this subscription!") _delete_subscription(es_client, uuid) timestamp = datetime.datetime.utcnow() time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
def delete(uuid: str, replica: str): authenticated_user_email = security.get_token_email(request.token_info) uuid = uuid.lower() tombstone_key = CollectionTombstoneID(uuid, version=None).to_key() tombstone_object_data = dict(email=authenticated_user_email) owner = get_impl(uuid=uuid, replica=replica)["owner"] if owner != authenticated_user_email: raise DSSException(requests.codes.forbidden, "forbidden", f"Collection access denied") created, idempotent = idempotent_save( Config.get_blobstore_handle(Replica[replica]), Replica[replica].bucket, tombstone_key, json.dumps(tombstone_object_data).encode("utf-8")) if not idempotent: raise DSSException( requests.codes.conflict, f"collection_tombstone_already_exists", f"collection tombstone with UUID {uuid} already exists") status_code = requests.codes.ok response_body = dict() # type: dict # update dynamoDB owner_lookup.delete_collection_uuid(owner=authenticated_user_email, uuid=uuid) return jsonify(response_body), status_code
def get_impl(uuid: str, replica: str, version: str = None): uuid = uuid.lower() bucket = Replica[replica].bucket handle = Config.get_blobstore_handle(Replica[replica]) tombstone_key = CollectionTombstoneID(uuid, version=None).to_key() if test_object_exists(handle, bucket, tombstone_key): raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) if version is None: # list the collections and find the one that is the most recent. prefix = CollectionFQID(uuid, version=None).to_key_prefix() for matching_key in handle.list(bucket, prefix): matching_key = matching_key[len(prefix):] if version is None or matching_key > version: version = matching_key try: collection_blob = handle.get(bucket, CollectionFQID(uuid, version).to_key()) except BlobNotFoundError: raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) return json.loads(collection_blob)
def delete(uuid: str, replica: str): authenticated_user_email = request.token_info['email'] es_client = ElasticsearchClient.get() try: response = es_client.get(index=Config.get_es_index_name( ESIndexType.subscriptions, Replica[replica]), doc_type=ESDocType.subscription.name, id=uuid) except NotFoundError as ex: raise DSSException(requests.codes.not_found, "not_found", "Cannot find subscription!") stored_metadata = response['_source'] if stored_metadata['owner'] != authenticated_user_email: # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent. raise DSSException(requests.codes.forbidden, "Forbidden", "Your credentials can't access this subscription!") # get all indexes that use current alias alias_name = Config.get_es_alias_name(ESIndexType.docs, Replica[replica]) doc_indexes = _get_indexes_by_alias(es_client, alias_name) _unregister_percolate(es_client, doc_indexes, uuid) es_client.delete(index=Config.get_es_index_name(ESIndexType.subscriptions, Replica[replica]), doc_type=ESDocType.subscription.name, id=uuid) timestamp = datetime.datetime.utcnow() time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
def get(uuid: str, replica: str): owner = security.get_token_email(request.token_info) es_client = ElasticsearchClient.get() try: response = es_client.get(index=Config.get_es_index_name( ESIndexType.subscriptions, Replica[replica]), doc_type=ESDocType.subscription.name, id=uuid) except NotFoundError: raise DSSException(requests.codes.not_found, "not_found", "Cannot find subscription!") source = response['_source'] source['uuid'] = uuid source['replica'] = replica if 'hmac_key_id' in response: source['hmac_key_id'] = response['hmac_key_id'] if 'hmac_secret_key' in source: source.pop('hmac_secret_key') if source['owner'] != owner: # common_error_handler defaults code to capitalized 'Forbidden' for Werkzeug exception. Keeping consistent. raise DSSException(requests.codes.forbidden, "Forbidden", "Your credentials can't access this subscription!") return jsonify(source), requests.codes.okay
def verify_jwt(token: str) -> typing.Optional[typing.Mapping]: try: unverified_token = jwt.decode(token, verify=False) except jwt.DecodeError: logger.info(f"Failed to decode JWT: {token}", exc_info=True) raise DSSException(401, 'Unauthorized', 'Failed to decode token.') assert_authorized_issuer(unverified_token) issuer = unverified_token['iss'] public_keys = get_public_keys(issuer) try: token_header = jwt.get_unverified_header(token) verified_tok = jwt.decode( token, key=public_keys[token_header["kid"]], issuer=issuer, audience=Config.get_audience(), algorithms=allowed_algorithms, ) logger.info("""{"valid": true, "token": %s}""", json.dumps(verified_tok)) except jwt.PyJWTError as ex: # type: ignore logger.info("""{"valid": false, "token": %s}""", json.dumps(unverified_token), exc_info=True) raise DSSException(401, 'Unauthorized', 'Authorization token is invalid') from ex return verified_tok
def get_token_email(token_info: typing.Mapping[str, typing.Any]) -> str: try: email_claim = Config.get_OIDC_email_claim() return token_info.get(email_claim) or token_info['email'] except KeyError: raise DSSException(401, 'Unauthorized', 'Authorization token is missing email claims.')
def resolve_content_item(replica: Replica, blobstore_handle: BlobStore, item: dict): try: if item["type"] in {"file", "bundle", "collection"}: item_metadata = get_json_metadata(item["type"], item["uuid"], item["version"], replica, blobstore_handle) else: item_metadata = get_json_metadata("file", item["uuid"], item["version"], replica, blobstore_handle) if "fragment" not in item: raise Exception( 'The "fragment" field is required in collection elements ' 'other than files, bundles, and collections') blob_path = compose_blob_key(item_metadata) # check that item is marked as metadata, is json, and is less than max size item_doc = json.loads( blobstore_handle.get(replica.bucket, blob_path)) item_content = jsonpointer.resolve_pointer(item_doc, item["fragment"]) return item_content except DSSException: raise except Exception as e: raise DSSException( requests.codes.unprocessable_entity, "invalid_link", 'Error while parsing the link "{}": {}: {}'.format( item, type(e).__name__, e))
def get(uuid: str, replica: str, version: str = None): authenticated_user_email = security.get_token_email(request.token_info) collection_body = get_impl(uuid=uuid, replica=replica, version=version) if collection_body["owner"] != authenticated_user_email: raise DSSException(requests.codes.forbidden, "forbidden", f"Collection access denied") return collection_body
def get(uuid: str, replica: str): owner = security.get_token_email(request.token_info) subscription = get_subscription(Replica[replica], owner, uuid) if subscription is None or owner != subscription[SubscriptionData.OWNER]: raise DSSException(404, "not_found", "Cannot find subscription!") if 'hmac_secret_key' in subscription: subscription.pop('hmac_secret_key') return subscription, requests.codes.ok
def delete(uuid: str, replica: str): owner = security.get_token_email(request.token_info) subscription = get_subscription(Replica[replica], owner, uuid) if subscription is None or owner != subscription[SubscriptionData.OWNER]: raise DSSException(404, "not_found", "Cannot find subscription!") delete_subscription(Replica[replica], owner, uuid) timestamp = datetime.datetime.utcnow() time_deleted = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ") return jsonify({'timeDeleted': time_deleted}), requests.codes.okay
def get(replica: str, checkout_job_id: str): assert replica is not None _replica = Replica[replica] try: response = get_bundle_checkout_status(checkout_job_id, _replica, _replica.checkout_bucket) except BlobNotFoundError: raise DSSException(requests.codes.not_found, "not_found", "Cannot find checkout!") return response, requests.codes.ok
def put(json_request_body: dict, replica: str): owner = security.get_token_email(request.token_info) if count_subscriptions_for_owner(Replica[replica], owner) > SUBSCRIPTION_LIMIT: raise DSSException(requests.codes.not_acceptable, "not_acceptable", f"Users cannot exceed {SUBSCRIPTION_LIMIT} subscriptions!") subscription_doc = json_request_body.copy() subscription_doc[SubscriptionData.OWNER] = security.get_token_email(request.token_info) subscription_uuid = str(uuid4()) subscription_doc[SubscriptionData.UUID] = subscription_uuid subscription_doc[SubscriptionData.REPLICA] = Replica[replica].name if subscription_doc.get(SubscriptionData.JMESPATH_QUERY) is not None: try: jmespath.compile(subscription_doc[SubscriptionData.JMESPATH_QUERY]) except JMESPathError: raise DSSException( requests.codes.bad_request, "invalid_jmespath", "JMESPath query is invalid" ) # validate attachment JMESPath if present attachments = subscription_doc.get(SubscriptionData.ATTACHMENTS) if attachments is not None: for name, definition in attachments.items(): if name.startswith('_'): raise DSSException(requests.codes.bad_request, "invalid_attachment_name", f"Attachment names must not start with underscore ({name})") type_ = definition['type'] if type_ == 'jmespath': expression = definition['expression'] try: jmespath.compile(expression) except JMESPathError as e: raise DSSException(requests.codes.bad_request, "invalid_attachment_expression", f"Unable to compile JMESPath expression for attachment {name}") from e else: assert False, type_ put_subscription(subscription_doc) return subscription_doc, requests.codes.created
def post(uuid: str, json_request_body: dict, replica: str, version: str = None): assert replica is not None _replica: Replica = Replica[replica] dst_bucket = json_request_body.get('destination', _replica.checkout_bucket) if '/' in dst_bucket: raise DSSException(400, "illegal_arguments", "Destination bucket invalid!") try: execution_id = start_bundle_checkout( _replica, uuid, version, dst_bucket=dst_bucket, email_address=json_request_body.get('email', None), ) except BundleNotFoundError: raise DSSException(404, "not_found", "Cannot find bundle!") return jsonify(dict(checkout_job_id=execution_id)), requests.codes.ok
def patch(uuid: str, json_request_body: dict, replica: str, version: str): authenticated_user_email = security.get_token_email(request.token_info) uuid = uuid.lower() owner = get_impl(uuid=uuid, replica=replica)["owner"] if owner != authenticated_user_email: raise DSSException(requests.codes.forbidden, "forbidden", f"Collection access denied") handle = Config.get_blobstore_handle(Replica[replica]) try: cur_collection_blob = handle.get( Replica[replica].bucket, CollectionFQID(uuid, version).to_key()) except BlobNotFoundError: raise DSSException( 404, "not_found", "Could not find collection for UUID {}".format(uuid)) collection = json.loads(cur_collection_blob) for field in "name", "description", "details": if field in json_request_body: collection[field] = json_request_body[field] remove_contents_set = set( map(hashabledict, json_request_body.get("remove_contents", []))) collection["contents"] = [ i for i in collection["contents"] if hashabledict(i) not in remove_contents_set ] verify_collection(json_request_body.get("add_contents", []), Replica[replica], handle) collection["contents"].extend(json_request_body.get("add_contents", [])) collection["contents"] = _dedpuplicate_contents(collection["contents"]) timestamp = datetime.datetime.utcnow() new_collection_version = datetime_to_version_format(timestamp) handle.upload_file_handle( Replica[replica].bucket, CollectionFQID(uuid, new_collection_version).to_key(), io.BytesIO(json.dumps(collection).encode("utf-8"))) return jsonify(dict(uuid=uuid, version=new_collection_version)), requests.codes.ok
def get_json_metadata(entity_type: str, uuid: str, version: str, replica: Replica, blobstore_handle: BlobStore, max_metadata_size: int = MAX_METADATA_SIZE): try: key = "{}s/{}.{}".format(entity_type, uuid, version) # TODO: verify that file is a metadata file size = blobstore_handle.get_size(replica.bucket, key) if size > max_metadata_size: raise DSSException( requests.codes.unprocessable_entity, "invalid_link", "The file UUID {} refers to a file that is too large to process" .format(uuid)) return json.loads( blobstore_handle.get( replica.bucket, "{}s/{}.{}".format(entity_type, uuid, version))) except BlobNotFoundError: raise DSSException(requests.codes.unprocessable_entity, "invalid_link", "Could not find file for UUID {}".format(uuid))
def is_DSS_VERSION(val): """ Verifies `val` is compliant with expected format. See for more info on connexion custom type formats https://connexion.readthedocs.io/en/latest/cookbook.html#custom-type-format. :param val: the value to verify :return: the verified value """ from iso8601 import iso8601 # convert it to date-time so we can format exactly as the system requires (with microsecond precision) try: timestamp = iso8601.parse_date(val) except iso8601.ParseError: raise DSSException( requests.codes.bad_request, "illegal_version", f"version should be an RFC3339 compliant timestamp") timestamp = datetime_to_version_format(timestamp) if timestamp != val: raise DSSException( requests.codes.bad_request, "illegal_version", f"version should be a DSS_VERSION with the format 'YYYY-MM-DDTHHmmSS.zzzzzzZ'" ) return val
def test_502_get_bundle_HAS_retry_after_response(self): """Mock seems resistant to multiple calls, therefore this is only used for one endpoint.""" with mock.patch('dss.api.bundles.get', side_effect=DSSException(502, 'bad_gateway', "Bad Gateway")): self.app = ThreadedLocalServer() self.app.start() uuid = "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" version = datetime_to_version_format(datetime.datetime.utcnow()) url = str(UrlBuilder().set(path=f"/v1/bundles/{uuid}").add_query( "version", version).add_query("replica", 'aws')) r = self.assertGetResponse(url, 502, headers=get_auth_header()) self.assertEqual(int(r.response.headers['Retry-After']), 10) self.app.shutdown()
def list_events(replica: str, from_date: str = None, to_date: str = None, per_page: int = 1, token: str = None): if token: fdate = datetime_from_timestamp(token) else: fdate = datetime_from_timestamp( from_date) if from_date else datetime.min tdate = datetime_from_timestamp(to_date) if to_date else datetime.max if fdate > tdate: raise DSSException(400, "bad_request", "to_date must be greater than from_date") ff = Config.get_flashflood_handle(Replica[replica].flashflood_prefix_read) event_streams = list() for i, event_stream in enumerate(ff.list_event_streams(fdate, tdate)): if datetime_from_timestamp(event_stream['from_date']) < tdate: event_streams.append(event_stream) else: break if i == per_page: break if len(event_streams) <= per_page: response = make_response(jsonify(dict(event_streams=event_streams)), requests.codes.ok) response.headers['X-OpenAPI-Pagination'] = 'false' else: next_url = UrlBuilder(request.url) next_url.replace_query("token", event_streams[-1]['from_date']) link = f"<{next_url}>; rel='next'" response = make_response( jsonify(dict(event_streams=event_streams[:-1])), requests.codes.partial) response.headers['Link'] = link response.headers['X-OpenAPI-Pagination'] = 'true' response.headers['X-OpenAPI-Paginated-Content-Key'] = 'event_streams' return response
def mock_500_server_error(): raise DSSException(requests.codes.internal_server_error, "internal_server_error", "Internal Server Error")
def put(json_request_body: dict, replica: str): uuid = str(uuid4()) es_query = json_request_body['es_query'] owner = request.token_info['email'] es_client = ElasticsearchClient.get() index_mapping = { "mappings": { ESDocType.subscription.name: { "properties": { "owner": { "type": "string", "index": "not_analyzed" }, "es_query": { "type": "object", "enabled": "false" } } } } } # Elasticsearch preprocesses inputs by splitting strings on punctuation. # So for [email protected], if I searched for people with the email address [email protected], # [email protected] would show up because elasticsearch matched example w/ example. # By including "index": "not_analyzed", Elasticsearch leaves all owner inputs alone. index_name = Config.get_es_index_name(ESIndexType.subscriptions, Replica[replica]) IndexManager.get_subscription_index(es_client, index_name, index_mapping) # get all indexes that use current alias alias_name = Config.get_es_alias_name(ESIndexType.docs, Replica[replica]) doc_indexes = _get_indexes_by_alias(es_client, alias_name) # try to subscribe query to each of the indexes. subscribed_indexes = [] for doc_index in doc_indexes: try: percolate_registration = _register_percolate( es_client, doc_index, uuid, es_query, replica) except ElasticsearchException as ex: logger.debug( f"Exception occured when registering a document to an index. Exception: {ex}" ) last_ex = ex else: logger.debug( f"Percolate query registration succeeded:\n{percolate_registration}" ) subscribed_indexes.append(doc_index) # Queries are unlikely to fit in all of the indexes, therefore errors will almost always occur. Only return an error # if no queries are successfully indexed. if doc_indexes and not subscribed_indexes: logger.critical( f"Percolate query registration failed: owner: {owner}, uuid: {uuid}, " f"replica: {replica}, es_query: {es_query}, Exception: {last_ex}") raise DSSException( requests.codes.internal_server_error, "elasticsearch_error", "Unable to register elasticsearch percolate query!") from last_ex json_request_body['owner'] = owner try: subscription_registration = _register_subscription( es_client, uuid, json_request_body, replica) logger.debug( f"Event Subscription succeeded:\n{subscription_registration}") except ElasticsearchException as ex: logger.critical( f"Event Subscription failed: owner: {owner}, uuid: {uuid}, " f"replica: {replica}, Exception: {ex}") # Delete percolate query to make sure queries and subscriptions are in sync. doc_indexes = _get_indexes_by_alias(es_client, alias_name) _unregister_percolate(es_client, doc_indexes, uuid) raise DSSException( requests.codes.internal_server_error, "elasticsearch_error", "Unable to register subscription! Rolling back percolate query.") return jsonify(dict(uuid=uuid)), requests.codes.created
def mock_501_not_implemented(): raise DSSException(requests.codes.not_implemented, "not_implemented", "Not Implemented")
def mock_502_bad_gateway(): raise DSSException(requests.codes.bad_gateway, "bad_gateway", "Bad Gateway")
def mock_503_service_unavailable(): raise DSSException(requests.codes.service_unavailable, "service_unavailable", "Service Unavailable")
def mock_504_gateway_timeout(): raise DSSException(requests.codes.gateway_timeout, "gateway_timeout", "Gateway Timeout")
def get(uuid: str, replica: str, version: str = None): key = f"bundles/{uuid}.{version}" doc = events.get_bundle_metadata_document(Replica[replica], key) if doc is None: raise DSSException(404, "not_found", "Cannot find event!") return doc