def test_get_package_by_node_id(bf, dataset, package, auth_header, trace_id_headers): client = PennsieveApiClient(bf.settings.api_host) response = client.get_package_ids( dataset.id, package.node_id, headers=dict(**auth_header, **trace_id_headers) ) assert response.id == package.id assert response.node_id == package.node_id
def test_get_packages(bf, dataset, package, auth_header, trace_id_headers): client = PennsieveApiClient(bf.settings.api_host) packages = client.get_packages( dataset.id, [package.id], headers=dict(**auth_header, **trace_id_headers) ) assert packages[package.id]["content"]["nodeId"] == package.node_id assert packages[package.id]["content"]["id"] == package.id
def test_get_datasets(bf, dataset, package, auth_header, trace_id_headers): client = PennsieveApiClient(bf.settings.api_host) datasets = client.get_datasets(headers=dict(**auth_header, **trace_id_headers)) assert len(datasets) > 0 dataset_ids = client.get_dataset_ids( headers=dict(**auth_header, **trace_id_headers) ) assert sorted(d.int_id for d in datasets) == sorted(dataset_ids)
def test_get_packages_ignores_deleted_packages( bf, dataset, package, auth_header, trace_id_headers ): client = PennsieveApiClient(bf.settings.api_host) packages = client.get_packages( dataset.id, ["N:package:does-not-exist"], headers=dict(**auth_header, **trace_id_headers), ) assert len(packages) == 0
def create_proxy_instance(db: PartitionedDatabase, proxy_type: str, body: JsonDict) -> List[JsonDict]: response = [] with db.transaction() as tx: x_bf_trace_id = AuditLogger.trace_id_header() link_targets = [] package_ids = [] for target in body["targets"]: link_target = target["linkTarget"] relationship_type = target["relationshipType"] link_targets.append(link_target) concept_link_target = to_proxy_link_target(link_target) if concept_link_target is None: raise InvalidPackageProxyLinkTargetError(link_target=str(body)) package = PennsieveApiClient.get().get_package_ids( db.dataset_node_id, body["external_id"], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_ids.append(str(package.id)) package_proxy = db.create_package_proxy_tx( tx=tx, record=concept_link_target.id, package_id=package.id, package_node_id=package.node_id, legacy_relationship_type=relationship_type, ) linkResult = { "proxyInstance": to_proxy_instance(PROXY_TYPE, package_proxy), "relationshipInstance": make_proxy_relationship_instance(concept_link_target.id, package_proxy, relationship_type), } response.append(linkResult) AuditLogger.get().message().append("link-targets", *link_targets).append( "packages", *package_ids).log(x_bf_trace_id) return response, 201
def wrapper(db: PartitionedDatabase, **kwargs): result = func(db, **kwargs) config = Config.from_app() jwt = service_claim(db.organization_id, db.dataset_id, config.jwt_config) try: PennsieveApiClient.get().touch_dataset( db.organization_id, db.dataset_id, headers={"Authorization": f"Bearer {jwt}"}, ) except Exception as e: VictorOpsClient.get().warning( f"organization/{db.organization_id}/dataset/{db.dataset_id}", f"Couldn't touch dataset {db.dataset_id} for organization={db.organization_id}", ) logger.warn( f"couldn't touch timestamp for organization/{db.organization_id}/dataset/{db.dataset_id}: {e}" ) return result
def create_package_proxy( db: PartitionedDatabase, record_id: RecordId, package_id: PackageId, body: JsonDict ) -> Tuple[JsonDict, int]: x_bf_trace_id = AuditLogger.trace_id_header() package = PennsieveApiClient.get().get_package_ids( db.dataset_node_id, package_id, headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) return ( db.create_package_proxy( record_id, package_id=package.id, package_node_id=package.node_id ).to_dict(), 201, )
def get_files_paged( db: PartitionedDatabase, concept_id: str, id_: str, limit: int = 100, offset: int = 0, order_by: str = "createdAt", ascending: bool = True, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() total_count, package_proxies = db.get_package_proxies_for_record( id_, limit=limit, offset=offset) package_proxies = list(package_proxies) # If any packages cannot be found they will be ignored in this response # TODO: https://app.clickup.com/t/3gaec4 packages = PennsieveApiClient.get().get_packages( db.dataset_node_id, package_ids=[proxy.package_id for proxy in package_proxies], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_proxy_ids = [str(p.id) for p in package_proxies] package_ids = packages.keys() AuditLogger.get().message().append( "package-proxies", *package_proxy_ids).append("packages", *package_ids).log(TraceId(x_bf_trace_id)) return { "limit": limit, "offset": offset, "totalCount": total_count, "results": [[{ "id": proxy.id }, to_legacy_package_dto(packages[proxy.package_id])] for proxy in package_proxies if proxy.package_id in packages], }
def authorize_search(organization_id: int, trace_id: TraceId, token_info: Claim): if not token_info.is_user_claim: raise OAuthProblem("Requires a user claim") if not token_info.has_organization_access( RoleOrganizationId(organization_id)): raise Forbidden user_id = UserId(token_info.content.node_id) datasets = PennsieveApiClient.get().get_datasets( headers=dict(**auth_header(), **with_trace_id_header(trace_id))) return SearchDatabase( db=current_app.config["db"], organization_id=organization_id, user_id=user_id, datasets=datasets, )
def get_files( db: PartitionedDatabase, concept_id: str, id_: str, limit: int = 100, offset: int = 0, order_by: str = "createdAt", ascending: bool = True, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() _, package_proxies = db.get_package_proxies_for_record( id_, limit=limit, offset=offset ) package_proxies = list(package_proxies) # If any packages cannot be found they will be ignored in this response # TODO: https://app.clickup.com/t/3gaec4 packages = PennsieveApiClient.get().get_packages( db.dataset_node_id, package_ids=[proxy.package_id for proxy in package_proxies], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_proxy_ids = [str(p.id) for p in package_proxies] package_ids = packages.keys() AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append( "packages", *package_ids ).log(TraceId(x_bf_trace_id)) # Yes, this response is crazy: an array of two-tuples (arrays), containing a # single object with the proxy id, and the package DTO. return [ [{"id": proxy.id}, to_legacy_package_dto(packages[proxy.package_id])] for proxy in package_proxies if proxy.package_id in packages ]
def fetch_dataset_id(claim: Claim, node_id: DatasetNodeId) -> DatasetId: """ Given a dataset's node ID, attempt to look up its integer ID, first from the JWT claim, then the database, then from the Pennsieve API. Raises ------ ExternalRequestError """ for role in claim.content.roles: if role.type == JwtRole.DATASET_ROLE and role.node_id == node_id: return DatasetId(role.id.id) dataset_id = Database.from_server().get_dataset_id(node_id) if dataset_id is not None: return dataset_id return DatasetId( PennsieveApiClient.get() .get_dataset(node_id, headers=dict(**auth_header(), **trace_id_header_dict())) .int_id )
def packages(organization_id: int, token_info: Claim, limit: int, offset: int, body: JsonDict) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) api_client = PennsieveApiClient.get() property_filters: List[PropertyFilter] = PropertyFilter.schema().load( body["filters"], many=True) dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])] # 1) Run the query, and get all package results, total_count = db.search_packages( model_filter=ModelFilter(body["model"]), property_filters=property_filters, dataset_filters=dataset_filters, limit=limit, offset=offset, ) results = list(results) # 2) Group packages by dataset - the API endpoint to get datasets requires # a dataset ID in the URL. packages_by_dataset = defaultdict(list) for result in results: packages_by_dataset[result.dataset].append(result.package) package_dtos = [] package_ids = [] # 3) Get all package DTOs for dataset, packages in packages_by_dataset.items(): dtos = api_client.get_packages( dataset.node_id, [package.id for package in packages], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) # 4) Reorder the DTOs in the response to match the order that packages # came out of the database. If a package has been deleted from Pennsieve # API, but not from Neo4j, it will be missing from the response. Ignore # this for now. # TODO: https://app.clickup.com/t/2c3ec9 for package in packages: package_ids.append(package.id) if package.id in dtos: package_dtos.append(dtos[package.id]) # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(r.dataset.id) for r in results ]).append("packages", *package_ids).log(x_bf_trace_id) return { "packages": package_dtos, "totalCount": total_count, "limit": limit, "offset": offset, }
def create_app( config: Config = None, db: Database = None, api_client: PennsieveApiClient = None, jobs_client: PennsieveJobsClient = None, audit_logger: Auditor = None, victor_ops_client: VictorOpsClient = None, ): app = App(__name__) health = bundled("health.yml") internal = bundled("model-service-internal.yml") api_v1 = bundled("model-service-v1.yml") api_v2 = bundled("model-service-v2.yml") api_v2_streaming = bundled("model-service-streaming-v2.yml") app.add_api( internal, validate_responses=True, pythonic_params=True, base_path="/internal" ) app.add_api(api_v1, validate_responses=True, pythonic_params=True, base_path="/v1") app.add_api(api_v2, validate_responses=True, pythonic_params=True, base_path="/v2") app.app.json_encoder = CustomizedEncoder # Mount the v1 API again with no `v1/` prefix. Ideally this would rewritten # in the gateway, but internal services need to be updated to us the `/v1` # prefix first. This needs to be merged with `health` so that these routes # can share the same base path. # # See ticket: https://app.clickup.com/t/5mcufd root_api = {} root_api.update(api_v1) root_api["paths"].update(health["paths"]) app.add_api(root_api, validate_responses=True, pythonic_params=True, base_path="/") # Unfortunately the only way to stream responses with connexion is to turn # response validation off. app.add_api( api_v2_streaming, validate_responses=False, pythonic_params=True, base_path="/v2/organizations", ) @app.app.errorhandler(ValueError) def handle_value_error(error): stacktrace = get_error_context() return ( dict(message=str(error), stacktrace=stacktrace), 400, {"Content-Type": "application/json"}, ) @app.app.errorhandler(NotImplementedError) def handle_not_implemented_error(error): stacktrace = get_error_context() return ( dict(message=str(error), stacktrace=stacktrace), 415, {"Content-Type": "application/json"}, ) @app.app.errorhandler(errors.ExternalRequestError) def handle_external_request_failure(error): return ( dict(message=str(error)), 500, {"Content-Type": "application/json"}, ) @app.app.errorhandler(errors.MissingTraceId) @app.app.errorhandler(errors.ModelServiceError) @app.app.errorhandler(errors.OperationError) @app.app.errorhandler(errors.InvalidOrganizationError) @app.app.errorhandler(errors.InvalidDatasetError) def handle_service_error(error): return error.to_json(), 400, {"Content-Type": "application/json"} @app.app.errorhandler(ExpiredSignatureError) @app.app.errorhandler(OAuthProblem) def handle_auth_error(error): return dict(message=str(error)), 401, {"Content-Type": "application/json"} @app.app.errorhandler(errors.RecordRelationshipNotFoundError) @app.app.errorhandler(errors.LegacyModelRelationshipNotFoundError) @app.app.errorhandler(errors.ModelRelationshipNotFoundError) @app.app.errorhandler(errors.ModelNotFoundError) def handle_not_found(error): return error.to_json(), 404, {"Content-Type": "application/json"} @app.app.errorhandler(errors.PackageProxyNotFoundError) def handle_proxy_package_not_found(error): return error.to_json(), 404, {"Content-Type": "application/json"} @app.app.errorhandler(errors.ExceededTimeLimitError) def handle_operation_timed_out(error): return error.to_json(), 408, {"Content-Type": "application/json"} @app.app.errorhandler(errors.ModelPropertyInUseError) def handle_model_property_in_use(error): return error.to_json(), 422, {"Content-Type": "application/json"} @app.app.errorhandler(errors.LockedDatasetError) def handle_locked_dataset(error): return error.to_json(), 423, {"Content-Type": "application/json"} if config is None: config = Config() app.app.config["config"] = config if db is None: db = Database.from_config(config) app.app.config["db"] = db if api_client is None: api_client = PennsieveApiClient(config.pennsieve_api_host) app.app.config["api_client"] = api_client if jobs_client is None: sqs_client = boto3.client("sqs", region_name=config.aws_region) jobs_client = PennsieveJobsClient(sqs_client, config.jobs_sqs_queue_id) app.app.config["jobs_client"] = jobs_client if victor_ops_client is None: victor_ops_client = VictorOpsClient( config.victor_ops_url, f"{config.environment}-data-management" ) app.app.config["victor_ops_client"] = victor_ops_client if audit_logger is None: audit_logger = AuditLogger(GatewayHost(config.gateway_internal_host)) app.app.config["audit_logger"] = audit_logger app.app.after_request(log_request) return app