def get_all_concept_instances( db: PartitionedDatabase, concept_id_or_name: str, limit: int, offset: int, order_by: Optional[str] = None, ascending: Optional[bool] = None, ) -> List[JsonDict]: with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) results = db.get_all_records_offset_tx( tx, model=model, limit=limit, offset=offset, fill_missing=True, order_by=None if order_by is None and ascending is None else OrderByField( name="created_at" if order_by is None else order_by, ascending=True if ascending is None else ascending, ), ) x_bf_trace_id = AuditLogger.trace_id_header() record_ids = [] instances = [] for record in results: record_ids.append(str(record.id)) instances.append(to_concept_instance(record, model, properties)) AuditLogger.get().message().append("records", *record_ids).log(x_bf_trace_id) return instances
def create_concept_instance(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) record = to_record(properties, body["values"]) records = db.create_records_tx(tx, concept_id_or_name, [record], fill_missing=True) if not records: raise BadRequest( f"Could not create concept instance [{concept_id_or_name}]") record = records[0] # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" event: PennsieveJobsClient.get().send_changelog_event( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, event=CreateRecord(id=record.id, name=record.name, model_id=model.id), trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", str(record.id)).log(x_bf_trace_id) return to_concept_instance(record, model, properties), 201
def get_all_package_proxies( db: PartitionedDatabase, record_id: RecordId, limit: int = 100, offset: int = 0 ) -> JsonDict: total_count, proxies = db.get_package_proxies_for_record( record_id, limit=limit, offset=offset ) x_bf_trace_id = AuditLogger.trace_id_header() package_proxy_ids = [] packages = [] for p in proxies: package_proxy_ids.append(str(p.id)) packages.append(p.to_dict()) AuditLogger.get().message().append("package-proxies", *package_proxy_ids).log( x_bf_trace_id ) return { "limit": limit, "offset": offset, "totalCount": total_count, "packages": packages, }
def autocomplete_model_properties( organization_id: int, model_name: str, token_info: Claim, dataset_id: Optional[int] = None, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) ds_id = None if dataset_id is None else DatasetId(dataset_id) datasets_properties_operators = list( db.suggest_properties(model_filter=ModelFilter(name=model_name), dataset_id=ds_id)) datasets = {d for (d, _, _) in datasets_properties_operators} properties_and_operators = unique_everseen( [(p, op) for (_, p, op) in datasets_properties_operators], key=lambda t: prop_key(t[0]), ) # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id) # If a name is a duplicate, include its type in the output display name # to disambinguate: return [ property_to_suggestion(p, ops) for (p, ops) in properties_and_operators ]
def create_proxy_instance(db: PartitionedDatabase, proxy_type: str, body: JsonDict) -> List[JsonDict]: response = [] with db.transaction() as tx: x_bf_trace_id = AuditLogger.trace_id_header() link_targets = [] package_ids = [] for target in body["targets"]: link_target = target["linkTarget"] relationship_type = target["relationshipType"] link_targets.append(link_target) concept_link_target = to_proxy_link_target(link_target) if concept_link_target is None: raise InvalidPackageProxyLinkTargetError(link_target=str(body)) package = PennsieveApiClient.get().get_package_ids( db.dataset_node_id, body["external_id"], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_ids.append(str(package.id)) package_proxy = db.create_package_proxy_tx( tx=tx, record=concept_link_target.id, package_id=package.id, package_node_id=package.node_id, legacy_relationship_type=relationship_type, ) linkResult = { "proxyInstance": to_proxy_instance(PROXY_TYPE, package_proxy), "relationshipInstance": make_proxy_relationship_instance(concept_link_target.id, package_proxy, relationship_type), } response.append(linkResult) AuditLogger.get().message().append("link-targets", *link_targets).append( "packages", *package_ids).log(x_bf_trace_id) return response, 201
def get_records_related_to_package( db: PartitionedDatabase, proxy_type: str, package_id: str, concept_id_or_name: str, limit: Optional[int] = None, offset: Optional[int] = None, relationship_order_by: Optional[str] = None, record_order_by: Optional[str] = None, ascending: bool = False, ) -> List[JsonDict]: with db.transaction() as tx: x_bf_trace_id = AuditLogger.trace_id_header() model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) results = [] package_proxy_ids = [] record_ids = [] for pp, r in db.get_records_related_to_package_tx( tx=tx, package_id=PackageNodeId(package_id), related_model_id_or_name=concept_id_or_name, limit=limit, offset=offset, relationship_order_by=relationship_order_by, record_order_by=record_order_by, ascending=ascending, ): package_proxy_ids.append(str(pp.id)) record_ids.append(str(r.id)) t = ( # All package-to-record relationships are defined with the # internal `@IN_PACKAGE` relationship type: # (Package)<-[`@IN_PACKAGE`]-(Record) # For legacy consistency, we just use the generic "belongs_to" # here: make_proxy_relationship_instance(r.id, pp, "belongs_to"), to_concept_instance(r, model, properties), ) results.append(t) AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append( "records", *record_ids).log(x_bf_trace_id) return results
def autocomplete_model_property_values( organization_id: int, model_name: str, property_name: str, token_info: Claim, dataset_id: Optional[int] = None, prefix: Optional[str] = None, unit: Optional[str] = None, limit: Optional[int] = 10, ) -> List[SuggestedValues]: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) ds_id = None if dataset_id is None else DatasetId(dataset_id) suggested_values: List[Tuple[Dataset, SuggestedValues]] = db.suggest_values( model_name=model_name, model_property_name=property_name, dataset_id=ds_id, matching_prefix=prefix, unit=unit, limit=limit, ) datasets: List[Dataset] = [d for d, _ in suggested_values] # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id) # Group properties by data type grouped_suggestions = defaultdict(list) for _, suggestion in suggested_values: grouped_suggestions[suggestion.property_.data_type.to_json()].append( suggestion) return [{ "property": property_to_suggestion(suggestions[0].property_, suggestions[0].operators), "values": list(chain.from_iterable(sv.values for sv in suggestions)), } for suggestions in grouped_suggestions.values()]
def get_files_paged( db: PartitionedDatabase, concept_id: str, id_: str, limit: int = 100, offset: int = 0, order_by: str = "createdAt", ascending: bool = True, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() total_count, package_proxies = db.get_package_proxies_for_record( id_, limit=limit, offset=offset) package_proxies = list(package_proxies) # If any packages cannot be found they will be ignored in this response # TODO: https://app.clickup.com/t/3gaec4 packages = PennsieveApiClient.get().get_packages( db.dataset_node_id, package_ids=[proxy.package_id for proxy in package_proxies], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_proxy_ids = [str(p.id) for p in package_proxies] package_ids = packages.keys() AuditLogger.get().message().append( "package-proxies", *package_proxy_ids).append("packages", *package_ids).log(TraceId(x_bf_trace_id)) return { "limit": limit, "offset": offset, "totalCount": total_count, "results": [[{ "id": proxy.id }, to_legacy_package_dto(packages[proxy.package_id])] for proxy in package_proxies if proxy.package_id in packages], }
def filtered_datasets_by_model( organization_id: int, token_info: Claim, model_name: str, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) datasets = db.get_dataset_id_by_model_name(model_name) AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id) return { "datasets": [dataset.to_dict() for dataset in datasets], "count": len(datasets), }
def get_all_records( db: PartitionedDatabase, model_id_or_name: str, limit: int, linked: bool, next_page: Optional[NextPageCursor] = None, ) -> List[JsonDict]: x_bf_trace_id = AuditLogger.trace_id_header() paged_result = db.get_all_records(model_id_or_name, limit=limit, embed_linked=linked, next_page=next_page) record_ids = [] for record in paged_result: record_ids.append(str(record.id)) AuditLogger.get().message().append("records", *record_ids).log(TraceId(x_bf_trace_id)) return PagedResult(results=paged_result.results, next_page=paged_result.next_page).to_dict()
def get_files( db: PartitionedDatabase, concept_id: str, id_: str, limit: int = 100, offset: int = 0, order_by: str = "createdAt", ascending: bool = True, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() _, package_proxies = db.get_package_proxies_for_record( id_, limit=limit, offset=offset ) package_proxies = list(package_proxies) # If any packages cannot be found they will be ignored in this response # TODO: https://app.clickup.com/t/3gaec4 packages = PennsieveApiClient.get().get_packages( db.dataset_node_id, package_ids=[proxy.package_id for proxy in package_proxies], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) package_proxy_ids = [str(p.id) for p in package_proxies] package_ids = packages.keys() AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append( "packages", *package_ids ).log(TraceId(x_bf_trace_id)) # Yes, this response is crazy: an array of two-tuples (arrays), containing a # single object with the proxy id, and the package DTO. return [ [{"id": proxy.id}, to_legacy_package_dto(packages[proxy.package_id])] for proxy in package_proxies if proxy.package_id in packages ]
def get_all_proxy_instances(db: PartitionedDatabase, proxy_type: str) -> List[JsonDict]: with db.transaction() as tx: proxy_instances = [] package_proxy_ids = [] record_ids = [] x_bf_trace_id = AuditLogger.trace_id_header() for package_proxy, record in db.get_all_package_proxies_tx(tx): proxy_instances.append(to_proxy_instance(PROXY_TYPE, package_proxy)) package_proxy_ids.append(str(package_proxy.id)) record_ids.append(str(record.id)) AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append( "records", *record_ids).log(x_bf_trace_id) return proxy_instances
def create_concept_instance_batch(db: PartitionedDatabase, concept_id_or_name: str, body: JsonDict): with db.transaction() as tx: model = db.get_model_tx(tx, concept_id_or_name) properties = db.get_properties_tx(tx, concept_id_or_name) requests = [to_record(properties, req["values"]) for req in body] records = db.create_records_tx(tx, concept_id_or_name, requests, fill_missing=True) instances = [ to_concept_instance(r, model, properties) for r in records ] if not instances: raise BadRequest( f"Could not create concept instances for [{concept_id_or_name}]" ) # Log the created concept instance: x_bf_trace_id = AuditLogger.trace_id_header() # Emit "CreateRecord" events: PennsieveJobsClient.get().send_changelog_events( organization_id=db.organization_id, dataset_id=db.dataset_id, user_id=db.user_id, events=[ CreateRecord(id=r.id, name=r.name, model_id=model.id) for r in records ], trace_id=TraceId(x_bf_trace_id), ) AuditLogger.get().message().append("records", *[str(r.id) for r in records ]).log(x_bf_trace_id) return instances
def autocomplete_models( organization_id: int, token_info: Claim, dataset_id: Optional[int] = None, related_to: Optional[str] = None, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) ds_id = None if dataset_id is None else DatasetId(dataset_id) datasets_and_models = list( db.suggest_models(dataset_id=ds_id, related_to=related_to)) datasets = {d for (d, _) in datasets_and_models} models = unique_everseen((m for (_, m) in datasets_and_models), key=lambda m: m.name) # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id) return {"models": [model.to_dict() for model in models]}
def run(db: PartitionedDatabase, body: JsonDict) -> List[JsonDict]: x_bf_trace_id = AuditLogger.trace_id_header() # connexion renames properties called "type" to "type_": body["type"] = body.pop("type_") # connexion converted CamelCased keys to snake_case: legacy_query = GraphQuery.schema().load(body) limit = int(legacy_query.limit) if legacy_query.limit is not None else 25 offset = int(legacy_query.offset) if legacy_query.offset is not None else 0 (user_query, src_model_id_or_name) = to_user_query(legacy_query) qr = QueryRunner(db, user_query) results = [] record_ids: List[str] = [] with db.transaction() as tx: src_model: Model = qr.get_model_tx(tx, cast(str, src_model_id_or_name)) models: Dict[str, Model] = qr.get_models_tx(tx, src_model_id_or_name) model_properties: Dict[ str, List[ModelProperty]] = qr.get_model_properties_tx( tx, src_model_id_or_name) for r in qr.run_tx(tx=tx, source_model=src_model_id_or_name, limit=limit, offset=offset): # if a single model was selected: expect `List[Record]`: if isinstance(r, Record): record_ids.append(str(r.id)) results.append({ "targetValue": to_concept_instance(r, src_model, model_properties[src_model.name]) }) else: if qr.query and qr.query.is_aggregating: results.append(r) else: # otherwise, expect `List[Dict[str, Record]]` result = {} for model_name_or_alias, record_data in r.items(): # If `model_name` is an alias, resolve it: model_name: str = ( qr.resolve_model_alias(model_name_or_alias) or model_name_or_alias) if model_name not in models: models[model_name] = qr.get_model_tx( tx, cast(str, model_name)) model_properties.update( qr.get_model_properties_tx( tx, cast(str, model_name))) if src_model.name == model_name: result.update({ "targetValue": to_concept_instance( record_data, models[model_name], model_properties[model_name], ) }) else: result.update({ model_name_or_alias: to_concept_instance( record_data, models[model_name], model_properties[model_name], ) }) record_ids.append(str(record_data.id)) results.append(result) AuditLogger.get().message().append("records", *record_ids).log(x_bf_trace_id) return results
def records( organization_id: int, token_info: Claim, limit: int, offset: int, order_direction: str, body: JsonDict, order_by: Optional[str] = None, ) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) property_filters: List[PropertyFilter] = PropertyFilter.schema().load( body["filters"], many=True) dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])] results, total_count = db.search_records( model_filter=ModelFilter(body["model"]), property_filters=property_filters, dataset_filters=dataset_filters, limit=limit, offset=offset, order_by=order_by, order_direction=OrderDirection.parse(order_direction), ) results = list(results) # Deduplicate the set of models and properites represented in the results. # TODO: do this in Neo4j so we don't have to send duplicate data over the wire model_properties = { result.model_id: result.properties for result in results } datasets = {result.model_id: result.dataset for result in results} # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(ds.id) for ds in datasets.values() ]).append("records", *[str(result.record.id) for result in results]).log(x_bf_trace_id) return { "models": [{ "id": model_id, "properties": [p.to_dict() for p in properties], "dataset": datasets[model_id], } for model_id, properties in model_properties.items()], "records": [{ "modelId": result.model_id, **result.record.to_dict() } for result in results], "totalCount": total_count, "limit": limit, "offset": offset, }
def packages(organization_id: int, token_info: Claim, limit: int, offset: int, body: JsonDict) -> JsonDict: x_bf_trace_id = AuditLogger.trace_id_header() db = authorize_search(organization_id, x_bf_trace_id, token_info) api_client = PennsieveApiClient.get() property_filters: List[PropertyFilter] = PropertyFilter.schema().load( body["filters"], many=True) dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])] # 1) Run the query, and get all package results, total_count = db.search_packages( model_filter=ModelFilter(body["model"]), property_filters=property_filters, dataset_filters=dataset_filters, limit=limit, offset=offset, ) results = list(results) # 2) Group packages by dataset - the API endpoint to get datasets requires # a dataset ID in the URL. packages_by_dataset = defaultdict(list) for result in results: packages_by_dataset[result.dataset].append(result.package) package_dtos = [] package_ids = [] # 3) Get all package DTOs for dataset, packages in packages_by_dataset.items(): dtos = api_client.get_packages( dataset.node_id, [package.id for package in packages], headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)), ) # 4) Reorder the DTOs in the response to match the order that packages # came out of the database. If a package has been deleted from Pennsieve # API, but not from Neo4j, it will be missing from the response. Ignore # this for now. # TODO: https://app.clickup.com/t/2c3ec9 for package in packages: package_ids.append(package.id) if package.id in dtos: package_dtos.append(dtos[package.id]) # Write to the audit log: AuditLogger.get().message().append("organization", organization_id).append( "datasets", *[str(r.dataset.id) for r in results ]).append("packages", *package_ids).log(x_bf_trace_id) return { "packages": package_dtos, "totalCount": total_count, "limit": limit, "offset": offset, }
def records_csv(organization_id: int, token_info: Claim, body: JsonDict) -> Response: x_bf_trace_id = AuditLogger.trace_id_header() try: search_params = SearchDownloadRequest.schema().load( json.loads(body["data"])) db = authorize_search(organization_id, x_bf_trace_id, token_info) datasets = {ds.int_id: ds for ds in db.datasets} # Write to the audit log: logger = AuditLogger.get() logger.message().append("organization", organization_id).append( "datasets", *[ds.int_id for ds in db.datasets]).log(x_bf_trace_id) model_properties = db.suggest_properties(search_params.model) columns = {} date_columns = set() for _, p, _ in model_properties: if dt.DataType.is_date(p.data_type): date_columns = date_columns.union({p.name}) unit = dt.DataType.get_unit(p.data_type) unit_display_name = None if unit is None else Unit.find_display_name( unit) display_name = ( p.display_name if unit is None else f"{p.display_name} ({unit if unit_display_name is None else unit_display_name})" ) columns[p.name] = display_name response = Response( iter_csv( db, search_params.model, search_params.filters, search_params.datasets, columns, date_columns, datasets, logger, x_bf_trace_id, ), mimetype="text/csv", ) response.headers[ "Content-Disposition"] = f"attachment; filename={search_params.model.name}{RECORD_DOWNLOAD_EXTENSION}" return response except Exception as e: response = Response(csv_error(e)) response.headers[ "Content-Disposition"] = "attachment; filename=!ERROR.txt" return response