def create_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = to_record(properties, body["values"]) records = db.create_records_tx(tx, concept_id_or_name, [record], fill_missing=True) if not records: raise BadRequest( f"Could not create concept instance [{concept_id_or_name}]") record = records[0] # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", str(record.id)).log(x_bf_trace_id) return to_concept_instance(record, model, properties), 201
def update_properties( db: PartitionedDatabase, concept_id_or_name: str, body: List[JsonDict] ) -> List[JsonDict]: x_bf_trace_id = AuditLogger.trace_id_header() with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.update_properties_tx( tx, model, *[to_model_property(p) for p in body] ) # Emit "UpdateModel" event: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateModelProperty( property_name=p.name, model_id=UUID(model.id), model_name=model.name ) if created else UpdateModelProperty( property_name=p.name, model_id=UUID(model.id), model_name=model.name ) for p, created in properties ], trace_id=TraceId(x_bf_trace_id), ) return [to_property_dict(p) for p, _ in properties]
def update_record( db: PartitionedDatabase, record_id: RecordId, body: JsonDict ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() record = db.get_record(record_id, embed_linked=False, fill_missing=True) if record is None: raise NotFound(f"Could not get record {record_id}") model = db.get_model_of_record(record) if model is None: raise NotFound(f"Cound not find model for record {record_id}") properties = db.get_properties(model) updated_record = db.update_record(record_id, body["values"]) # Emit "UpdateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=UpdateRecord( id=record.id, name=record.name, model_id=model.id, properties=UpdateRecord.compute_diff( properties, record.values, updated_record.values ), ), trace_id=TraceId(x_bf_trace_id), ) return updated_record.to_dict()
def update_properties(db: PartitionedDatabase, model_id_or_name: str, body: List[JsonDict]): x_bf_trace_id = AuditLogger.trace_id_header() payload: List[ModelProperty] = ModelProperty.schema().load(body, many=True) with db.transaction() as tx: model = db.get_model_tx(tx, model_id_or_name) properties = db.update_properties_tx(tx, model, *payload) PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateModelProperty(property_name=p.name, model_id=UUID(model.id), model_name=model.name) if created else UpdateModelProperty(property_name=p.name, model_id=UUID(model.id), model_name=model.name) for p, created in properties ], trace_id=TraceId(x_bf_trace_id), ) return [p.to_dict() for p, _ in properties]
def create_concept(db: PartitionedDatabase, body: JsonDict) -> Tuple[JsonDict, int]: x_bf_trace_id = AuditLogger.trace_id_header() model = db.create_model(**filter_model_dict(body)) # Emit "CreateModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateModel(id=UUID(model.id), name=model.name), trace_id=TraceId(x_bf_trace_id), ) return to_concept_dict(model, property_count=0), 201
def delete_model(db: PartitionedDatabase, model_id_or_name: str) -> None: model = db.delete_model(model_id_or_name) x_bf_trace_id = AuditLogger.trace_id_header() # Emit "DeleteModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteModel(id=model.id, name=model.name), trace_id=TraceId(x_bf_trace_id), ) return None
def update_model(db: PartitionedDatabase, model_id_or_name: str, body: JsonDict) -> JsonDict: model = db.update_model(model_id_or_name, **body) x_bf_trace_id = AuditLogger.trace_id_header() # Emit "UpdateModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=UpdateModel(id=model.id, name=model.name), trace_id=TraceId(x_bf_trace_id), ) return model.to_dict()
def create_model(db: PartitionedDatabase, body: JsonDict) -> Tuple[JsonDict, int]: model = db.create_model(**body) x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateModel(id=model.id, name=model.name), trace_id=TraceId(x_bf_trace_id), ) return model.to_dict(), 201
def delete_property( db: PartitionedDatabase, model_id: str, property_name: str, modify_records: bool = False, ) -> None: x_bf_trace_id = AuditLogger.trace_id_header() max_record_count = current_app.config[ "config"].max_record_count_for_property_deletion with db.transaction() as tx: model = db.get_model_tx(tx, model_id) if modify_records: record_count = db.model_property_record_count_tx( tx, model_id, property_name) if record_count > 0: if record_count > max_record_count: raise BadRequest( f"Cannot delete properties that are used on > {max_record_count} records. This property is used on {record_count}" ) model_properties = [ p for p in db.get_properties_tx(tx, model_id) if p.name == property_name ] if not model_properties: raise NotFound(f"no such property {property_name} exists") updated_records = db.delete_property_from_all_records_tx( tx, model_id, model_properties[0]) if updated_records != record_count: raise ServerError( "the property was not removed from all records") deleted = db.delete_property_tx(tx, model_id, property_name) if deleted is None: raise NotFound( f"Could not delete property [{model_id}.{property_name}]") PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteModelProperty( property_name=deleted.name, model_id=UUID(model.id), model_name=model.name, ), trace_id=TraceId(x_bf_trace_id), )
def update_concept( db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() with db.transaction() as tx: model = db.update_model_tx(tx, concept_id_or_name, **filter_model_dict(body)) property_count = db.get_property_counts_tx(tx, [model.id])[model.id] # Emit "UpdateModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=UpdateModel(id=UUID(model.id), name=model.name), trace_id=TraceId(x_bf_trace_id), ) return to_concept_dict(model, property_count)
def update_concept_instance( db: PartitionedDatabase, concept_id_or_name: str, concept_instance_id: str, body: JsonDict, ) -> JsonDict: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = db.get_record_tx( tx, concept_instance_id, embed_linked=False, fill_missing=True, ) if record is None: raise NotFound(f"Could not get record {concept_instance_id}") updated_record = db.update_record_tx( tx, concept_instance_id, to_record(properties, body["values"]), fill_missing=True, ) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "UpdateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=UpdateRecord( id=record.id, name=record.name, model_id=model.id, properties=UpdateRecord.compute_diff(properties, record.values, updated_record.values), ), trace_id=TraceId(x_bf_trace_id), ) return to_concept_instance(updated_record, model, properties)
def create_record(db: PartitionedDatabase, model_id_or_name: str, body: JsonDict) -> Tuple[JsonDict, int]: record_values = [body["values"]] x_bf_trace_id = AuditLogger.trace_id_header() record = db.create_records(model_id_or_name, record_values)[0] model = db.get_model(model_id_or_name) if model is None: raise NotFound(f"Model {model_id_or_name}") # Emit "CreateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) return record.to_dict(), 201
def delete_concept_instances(db: PartitionedDatabase, concept_id_or_name: str) -> JsonDict: # HACK: request bodies on DELETE requests do not have defined # semantics and are not directly support by OpenAPI/Connexion. See # - https://swagger.io/docs/specification/describing-request-body # - https://github.com/zalando/connexion/issues/896 body = connexion.request.json success = [] errors = [] events = [] with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, model) for instance_id in body: try: deleted = db.delete_record_tx(tx, instance_id, properties) events.append( DeleteRecord( id=deleted.id, name=deleted.name, model_id=model.id, )) except Exception as e: # noqa: F841 errors.append([instance_id, f"Could not delete {instance_id}"]) else: success.append(instance_id) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "DeleteRecord" event: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=events, trace_id=TraceId(x_bf_trace_id), ) return {"success": success, "errors": errors}
def delete_record(db: PartitionedDatabase, record_id: RecordId) -> None: x_bf_trace_id = AuditLogger.trace_id_header() model = db.get_model_of_record(record_id) if model is None: raise NotFound(f"Cound not find model for record {record_id}") properties = db.get_properties(model) deleted = db.delete_record(record_id, properties) # Emit "DeleteRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteRecord(id=deleted.id, name=deleted.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) return None
def create_records( db: PartitionedDatabase, model_id_or_name: str, body: List[Dict] ) -> Tuple[List[JsonDict], int]: x_bf_trace_id = AuditLogger.trace_id_header() record_values = [r["values"] for r in body] records = db.create_records(model_id_or_name, records=record_values) model = db.get_model(model_id_or_name) if model is None: raise NotFound(f"Model {model_id_or_name}") # Emit "CreateRecord" event: events = [CreateRecord(id=r.id, name=r.name, model_id=model.id) for r in records] PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=events, trace_id=TraceId(x_bf_trace_id), ) return [record.to_dict() for record in records], 201
def delete_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, concept_instance_id: str) -> None: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = db.delete_record_tx(tx, concept_instance_id, properties) x_bf_trace_id = AuditLogger.trace_id_header() # Emit a "DeleteRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) return to_concept_instance(record, model, properties)
def create_concept_instance_batch(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) requests = [to_record(properties, req["values"]) for req in body] records = db.create_records_tx(tx, concept_id_or_name, requests, fill_missing=True) instances = [ to_concept_instance(r, model, properties) for r in records ] if not instances: raise BadRequest( f"Could not create concept instances for [{concept_id_or_name}]" ) # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" events: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateRecord(id=r.id, name=r.name, model_id=model.id) for r in records ], trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", *[str(r.id) for r in records ]).log(x_bf_trace_id) return instances
def delete_property(db: PartitionedDatabase, concept_id_or_name: str, property_id: str) -> None: x_bf_trace_id = AuditLogger.trace_id_header() with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) deleted = db.delete_property_tx(tx, model, property_id) if deleted is None: raise NotFound( f"Could not find property {property_id} of model {concept_id_or_name}" ) # Emit "UpdateModel" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=DeleteModelProperty( property_name=deleted.name, model_id=UUID(model.id), model_name=model.name, ), trace_id=TraceId(x_bf_trace_id), )
def create_app( config: Config = None, db: Database = None, api_client: PennsieveApiClient = None, jobs_client: PennsieveJobsClient = None, audit_logger: Auditor = None, victor_ops_client: VictorOpsClient = None, ): app = App(__name__) health = bundled("health.yml") internal = bundled("model-service-internal.yml") api_v1 = bundled("model-service-v1.yml") api_v2 = bundled("model-service-v2.yml") api_v2_streaming = bundled("model-service-streaming-v2.yml") app.add_api( internal, validate_responses=True, pythonic_params=True, base_path="/internal" ) app.add_api(api_v1, validate_responses=True, pythonic_params=True, base_path="/v1") app.add_api(api_v2, validate_responses=True, pythonic_params=True, base_path="/v2") app.app.json_encoder = CustomizedEncoder # Mount the v1 API again with no `v1/` prefix. Ideally this would rewritten # in the gateway, but internal services need to be updated to us the `/v1` # prefix first. This needs to be merged with `health` so that these routes # can share the same base path. # # See ticket: https://app.clickup.com/t/5mcufd root_api = {} root_api.update(api_v1) root_api["paths"].update(health["paths"]) app.add_api(root_api, validate_responses=True, pythonic_params=True, base_path="/") # Unfortunately the only way to stream responses with connexion is to turn # response validation off. app.add_api( api_v2_streaming, validate_responses=False, pythonic_params=True, base_path="/v2/organizations", ) @app.app.errorhandler(ValueError) def handle_value_error(error): stacktrace = get_error_context() return ( dict(message=str(error), stacktrace=stacktrace), 400, {"Content-Type": "application/json"}, ) @app.app.errorhandler(NotImplementedError) def handle_not_implemented_error(error): stacktrace = get_error_context() return ( dict(message=str(error), stacktrace=stacktrace), 415, {"Content-Type": "application/json"}, ) @app.app.errorhandler(errors.ExternalRequestError) def handle_external_request_failure(error): return ( dict(message=str(error)), 500, {"Content-Type": "application/json"}, ) @app.app.errorhandler(errors.MissingTraceId) @app.app.errorhandler(errors.ModelServiceError) @app.app.errorhandler(errors.OperationError) @app.app.errorhandler(errors.InvalidOrganizationError) @app.app.errorhandler(errors.InvalidDatasetError) def handle_service_error(error): return error.to_json(), 400, {"Content-Type": "application/json"} @app.app.errorhandler(ExpiredSignatureError) @app.app.errorhandler(OAuthProblem) def handle_auth_error(error): return dict(message=str(error)), 401, {"Content-Type": "application/json"} @app.app.errorhandler(errors.RecordRelationshipNotFoundError) @app.app.errorhandler(errors.LegacyModelRelationshipNotFoundError) @app.app.errorhandler(errors.ModelRelationshipNotFoundError) @app.app.errorhandler(errors.ModelNotFoundError) def handle_not_found(error): return error.to_json(), 404, {"Content-Type": "application/json"} @app.app.errorhandler(errors.PackageProxyNotFoundError) def handle_proxy_package_not_found(error): return error.to_json(), 404, {"Content-Type": "application/json"} @app.app.errorhandler(errors.ExceededTimeLimitError) def handle_operation_timed_out(error): return error.to_json(), 408, {"Content-Type": "application/json"} @app.app.errorhandler(errors.ModelPropertyInUseError) def handle_model_property_in_use(error): return error.to_json(), 422, {"Content-Type": "application/json"} @app.app.errorhandler(errors.LockedDatasetError) def handle_locked_dataset(error): return error.to_json(), 423, {"Content-Type": "application/json"} if config is None: config = Config() app.app.config["config"] = config if db is None: db = Database.from_config(config) app.app.config["db"] = db if api_client is None: api_client = PennsieveApiClient(config.pennsieve_api_host) app.app.config["api_client"] = api_client if jobs_client is None: sqs_client = boto3.client("sqs", region_name=config.aws_region) jobs_client = PennsieveJobsClient(sqs_client, config.jobs_sqs_queue_id) app.app.config["jobs_client"] = jobs_client if victor_ops_client is None: victor_ops_client = VictorOpsClient( config.victor_ops_url, f"{config.environment}-data-management" ) app.app.config["victor_ops_client"] = victor_ops_client if audit_logger is None: audit_logger = AuditLogger(GatewayHost(config.gateway_internal_host)) app.app.config["audit_logger"] = audit_logger app.app.after_request(log_request) return app