Ejemplo n.º 1
0
def get_all_concept_instances(
    db: PartitionedDatabase,
    concept_id_or_name: str,
    limit: int,
    offset: int,
    order_by: Optional[str] = None,
    ascending: Optional[bool] = None,
) -> List[JsonDict]:
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        results = db.get_all_records_offset_tx(
            tx,
            model=model,
            limit=limit,
            offset=offset,
            fill_missing=True,
            order_by=None
            if order_by is None and ascending is None else OrderByField(
                name="created_at" if order_by is None else order_by,
                ascending=True if ascending is None else ascending,
            ),
        )

        x_bf_trace_id = AuditLogger.trace_id_header()
        record_ids = []
        instances = []
        for record in results:
            record_ids.append(str(record.id))
            instances.append(to_concept_instance(record, model, properties))

        AuditLogger.get().message().append("records",
                                           *record_ids).log(x_bf_trace_id)

        return instances
Ejemplo n.º 2
0
def create_concept_instance(db: PartitionedDatabase, concept_id_or_name: str,
                            body: JsonDict):
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        record = to_record(properties, body["values"])
        records = db.create_records_tx(tx,
                                       concept_id_or_name, [record],
                                       fill_missing=True)

        if not records:
            raise BadRequest(
                f"Could not create concept instance [{concept_id_or_name}]")
        record = records[0]

        # Log the created concept instance:
        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit "CreateRecord" event:
        PennsieveJobsClient.get().send_changelog_event(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            event=CreateRecord(id=record.id,
                               name=record.name,
                               model_id=model.id),
            trace_id=TraceId(x_bf_trace_id),
        )

        AuditLogger.get().message().append("records",
                                           str(record.id)).log(x_bf_trace_id)

        return to_concept_instance(record, model, properties), 201
Ejemplo n.º 3
0
def get_all_package_proxies(
    db: PartitionedDatabase, record_id: RecordId, limit: int = 100, offset: int = 0
) -> JsonDict:
    total_count, proxies = db.get_package_proxies_for_record(
        record_id, limit=limit, offset=offset
    )

    x_bf_trace_id = AuditLogger.trace_id_header()

    package_proxy_ids = []
    packages = []

    for p in proxies:
        package_proxy_ids.append(str(p.id))
        packages.append(p.to_dict())

    AuditLogger.get().message().append("package-proxies", *package_proxy_ids).log(
        x_bf_trace_id
    )

    return {
        "limit": limit,
        "offset": offset,
        "totalCount": total_count,
        "packages": packages,
    }
Ejemplo n.º 4
0
def autocomplete_model_properties(
    organization_id: int,
    model_name: str,
    token_info: Claim,
    dataset_id: Optional[int] = None,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    ds_id = None if dataset_id is None else DatasetId(dataset_id)

    datasets_properties_operators = list(
        db.suggest_properties(model_filter=ModelFilter(name=model_name),
                              dataset_id=ds_id))

    datasets = {d for (d, _, _) in datasets_properties_operators}
    properties_and_operators = unique_everseen(
        [(p, op) for (_, p, op) in datasets_properties_operators],
        key=lambda t: prop_key(t[0]),
    )

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id)

    # If a name is a duplicate, include its type in the output display name
    # to disambinguate:
    return [
        property_to_suggestion(p, ops) for (p, ops) in properties_and_operators
    ]
Ejemplo n.º 5
0
def create_proxy_instance(db: PartitionedDatabase, proxy_type: str,
                          body: JsonDict) -> List[JsonDict]:
    response = []

    with db.transaction() as tx:

        x_bf_trace_id = AuditLogger.trace_id_header()
        link_targets = []
        package_ids = []

        for target in body["targets"]:

            link_target = target["linkTarget"]
            relationship_type = target["relationshipType"]

            link_targets.append(link_target)

            concept_link_target = to_proxy_link_target(link_target)
            if concept_link_target is None:
                raise InvalidPackageProxyLinkTargetError(link_target=str(body))

            package = PennsieveApiClient.get().get_package_ids(
                db.dataset_node_id,
                body["external_id"],
                headers=dict(**auth_header(),
                             **with_trace_id_header(x_bf_trace_id)),
            )

            package_ids.append(str(package.id))

            package_proxy = db.create_package_proxy_tx(
                tx=tx,
                record=concept_link_target.id,
                package_id=package.id,
                package_node_id=package.node_id,
                legacy_relationship_type=relationship_type,
            )

            linkResult = {
                "proxyInstance":
                to_proxy_instance(PROXY_TYPE, package_proxy),
                "relationshipInstance":
                make_proxy_relationship_instance(concept_link_target.id,
                                                 package_proxy,
                                                 relationship_type),
            }

            response.append(linkResult)

    AuditLogger.get().message().append("link-targets", *link_targets).append(
        "packages", *package_ids).log(x_bf_trace_id)

    return response, 201
Ejemplo n.º 6
0
def get_records_related_to_package(
    db: PartitionedDatabase,
    proxy_type: str,
    package_id: str,
    concept_id_or_name: str,
    limit: Optional[int] = None,
    offset: Optional[int] = None,
    relationship_order_by: Optional[str] = None,
    record_order_by: Optional[str] = None,
    ascending: bool = False,
) -> List[JsonDict]:
    with db.transaction() as tx:

        x_bf_trace_id = AuditLogger.trace_id_header()
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)

        results = []
        package_proxy_ids = []
        record_ids = []

        for pp, r in db.get_records_related_to_package_tx(
                tx=tx,
                package_id=PackageNodeId(package_id),
                related_model_id_or_name=concept_id_or_name,
                limit=limit,
                offset=offset,
                relationship_order_by=relationship_order_by,
                record_order_by=record_order_by,
                ascending=ascending,
        ):
            package_proxy_ids.append(str(pp.id))
            record_ids.append(str(r.id))
            t = (
                # All package-to-record relationships are defined with the
                # internal `@IN_PACKAGE` relationship type:
                #   (Package)<-[`@IN_PACKAGE`]-(Record)
                # For legacy consistency, we just use the generic "belongs_to"
                # here:
                make_proxy_relationship_instance(r.id, pp, "belongs_to"),
                to_concept_instance(r, model, properties),
            )
            results.append(t)

        AuditLogger.get().message().append("package-proxies",
                                           *package_proxy_ids).append(
                                               "records",
                                               *record_ids).log(x_bf_trace_id)

        return results
Ejemplo n.º 7
0
def autocomplete_model_property_values(
    organization_id: int,
    model_name: str,
    property_name: str,
    token_info: Claim,
    dataset_id: Optional[int] = None,
    prefix: Optional[str] = None,
    unit: Optional[str] = None,
    limit: Optional[int] = 10,
) -> List[SuggestedValues]:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    ds_id = None if dataset_id is None else DatasetId(dataset_id)

    suggested_values: List[Tuple[Dataset,
                                 SuggestedValues]] = db.suggest_values(
                                     model_name=model_name,
                                     model_property_name=property_name,
                                     dataset_id=ds_id,
                                     matching_prefix=prefix,
                                     unit=unit,
                                     limit=limit,
                                 )

    datasets: List[Dataset] = [d for d, _ in suggested_values]

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id)

    # Group properties by data type
    grouped_suggestions = defaultdict(list)

    for _, suggestion in suggested_values:
        grouped_suggestions[suggestion.property_.data_type.to_json()].append(
            suggestion)

    return [{
        "property":
        property_to_suggestion(suggestions[0].property_,
                               suggestions[0].operators),
        "values":
        list(chain.from_iterable(sv.values for sv in suggestions)),
    } for suggestions in grouped_suggestions.values()]
Ejemplo n.º 8
0
def get_files_paged(
    db: PartitionedDatabase,
    concept_id: str,
    id_: str,
    limit: int = 100,
    offset: int = 0,
    order_by: str = "createdAt",
    ascending: bool = True,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    total_count, package_proxies = db.get_package_proxies_for_record(
        id_, limit=limit, offset=offset)

    package_proxies = list(package_proxies)

    # If any packages cannot be found they will be ignored in this response
    # TODO: https://app.clickup.com/t/3gaec4
    packages = PennsieveApiClient.get().get_packages(
        db.dataset_node_id,
        package_ids=[proxy.package_id for proxy in package_proxies],
        headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)),
    )

    package_proxy_ids = [str(p.id) for p in package_proxies]
    package_ids = packages.keys()

    AuditLogger.get().message().append(
        "package-proxies",
        *package_proxy_ids).append("packages",
                                   *package_ids).log(TraceId(x_bf_trace_id))

    return {
        "limit":
        limit,
        "offset":
        offset,
        "totalCount":
        total_count,
        "results": [[{
            "id": proxy.id
        }, to_legacy_package_dto(packages[proxy.package_id])]
                    for proxy in package_proxies
                    if proxy.package_id in packages],
    }
Ejemplo n.º 9
0
def filtered_datasets_by_model(
    organization_id: int,
    token_info: Claim,
    model_name: str,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    datasets = db.get_dataset_id_by_model_name(model_name)

    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id)

    return {
        "datasets": [dataset.to_dict() for dataset in datasets],
        "count": len(datasets),
    }
Ejemplo n.º 10
0
def get_all_records(
    db: PartitionedDatabase,
    model_id_or_name: str,
    limit: int,
    linked: bool,
    next_page: Optional[NextPageCursor] = None,
) -> List[JsonDict]:
    x_bf_trace_id = AuditLogger.trace_id_header()
    paged_result = db.get_all_records(model_id_or_name,
                                      limit=limit,
                                      embed_linked=linked,
                                      next_page=next_page)
    record_ids = []
    for record in paged_result:
        record_ids.append(str(record.id))

    AuditLogger.get().message().append("records",
                                       *record_ids).log(TraceId(x_bf_trace_id))

    return PagedResult(results=paged_result.results,
                       next_page=paged_result.next_page).to_dict()
Ejemplo n.º 11
0
def get_files(
    db: PartitionedDatabase,
    concept_id: str,
    id_: str,
    limit: int = 100,
    offset: int = 0,
    order_by: str = "createdAt",
    ascending: bool = True,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    _, package_proxies = db.get_package_proxies_for_record(
        id_, limit=limit, offset=offset
    )

    package_proxies = list(package_proxies)

    # If any packages cannot be found they will be ignored in this response
    # TODO: https://app.clickup.com/t/3gaec4
    packages = PennsieveApiClient.get().get_packages(
        db.dataset_node_id,
        package_ids=[proxy.package_id for proxy in package_proxies],
        headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)),
    )

    package_proxy_ids = [str(p.id) for p in package_proxies]
    package_ids = packages.keys()

    AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append(
        "packages", *package_ids
    ).log(TraceId(x_bf_trace_id))

    # Yes, this response is crazy: an array of two-tuples (arrays), containing a
    # single object with the proxy id, and the package DTO.
    return [
        [{"id": proxy.id}, to_legacy_package_dto(packages[proxy.package_id])]
        for proxy in package_proxies
        if proxy.package_id in packages
    ]
Ejemplo n.º 12
0
def get_all_proxy_instances(db: PartitionedDatabase,
                            proxy_type: str) -> List[JsonDict]:
    with db.transaction() as tx:

        proxy_instances = []
        package_proxy_ids = []
        record_ids = []

        x_bf_trace_id = AuditLogger.trace_id_header()

        for package_proxy, record in db.get_all_package_proxies_tx(tx):
            proxy_instances.append(to_proxy_instance(PROXY_TYPE,
                                                     package_proxy))
            package_proxy_ids.append(str(package_proxy.id))
            record_ids.append(str(record.id))

        AuditLogger.get().message().append("package-proxies",
                                           *package_proxy_ids).append(
                                               "records",
                                               *record_ids).log(x_bf_trace_id)

        return proxy_instances
Ejemplo n.º 13
0
def create_concept_instance_batch(db: PartitionedDatabase,
                                  concept_id_or_name: str, body: JsonDict):
    with db.transaction() as tx:
        model = db.get_model_tx(tx, concept_id_or_name)
        properties = db.get_properties_tx(tx, concept_id_or_name)
        requests = [to_record(properties, req["values"]) for req in body]
        records = db.create_records_tx(tx,
                                       concept_id_or_name,
                                       requests,
                                       fill_missing=True)
        instances = [
            to_concept_instance(r, model, properties) for r in records
        ]
        if not instances:
            raise BadRequest(
                f"Could not create concept instances for [{concept_id_or_name}]"
            )

        # Log the created concept instance:
        x_bf_trace_id = AuditLogger.trace_id_header()

        # Emit "CreateRecord" events:
        PennsieveJobsClient.get().send_changelog_events(
            organization_id=db.organization_id,
            dataset_id=db.dataset_id,
            user_id=db.user_id,
            events=[
                CreateRecord(id=r.id, name=r.name, model_id=model.id)
                for r in records
            ],
            trace_id=TraceId(x_bf_trace_id),
        )

        AuditLogger.get().message().append("records",
                                           *[str(r.id) for r in records
                                             ]).log(x_bf_trace_id)

        return instances
Ejemplo n.º 14
0
def autocomplete_models(
    organization_id: int,
    token_info: Claim,
    dataset_id: Optional[int] = None,
    related_to: Optional[str] = None,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    ds_id = None if dataset_id is None else DatasetId(dataset_id)

    datasets_and_models = list(
        db.suggest_models(dataset_id=ds_id, related_to=related_to))
    datasets = {d for (d, _) in datasets_and_models}
    models = unique_everseen((m for (_, m) in datasets_and_models),
                             key=lambda m: m.name)

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(ds.id) for ds in datasets]).log(x_bf_trace_id)

    return {"models": [model.to_dict() for model in models]}
Ejemplo n.º 15
0
def run(db: PartitionedDatabase, body: JsonDict) -> List[JsonDict]:

    x_bf_trace_id = AuditLogger.trace_id_header()

    # connexion renames properties called "type" to "type_":
    body["type"] = body.pop("type_")

    # connexion converted CamelCased keys to snake_case:
    legacy_query = GraphQuery.schema().load(body)
    limit = int(legacy_query.limit) if legacy_query.limit is not None else 25
    offset = int(legacy_query.offset) if legacy_query.offset is not None else 0

    (user_query, src_model_id_or_name) = to_user_query(legacy_query)
    qr = QueryRunner(db, user_query)

    results = []
    record_ids: List[str] = []

    with db.transaction() as tx:

        src_model: Model = qr.get_model_tx(tx, cast(str, src_model_id_or_name))
        models: Dict[str, Model] = qr.get_models_tx(tx, src_model_id_or_name)
        model_properties: Dict[
            str, List[ModelProperty]] = qr.get_model_properties_tx(
                tx, src_model_id_or_name)

        for r in qr.run_tx(tx=tx,
                           source_model=src_model_id_or_name,
                           limit=limit,
                           offset=offset):

            # if a single model was selected: expect `List[Record]`:
            if isinstance(r, Record):
                record_ids.append(str(r.id))
                results.append({
                    "targetValue":
                    to_concept_instance(r, src_model,
                                        model_properties[src_model.name])
                })
            else:
                if qr.query and qr.query.is_aggregating:
                    results.append(r)
                else:
                    # otherwise, expect `List[Dict[str, Record]]`
                    result = {}

                    for model_name_or_alias, record_data in r.items():

                        # If `model_name` is an alias, resolve it:
                        model_name: str = (
                            qr.resolve_model_alias(model_name_or_alias)
                            or model_name_or_alias)

                        if model_name not in models:
                            models[model_name] = qr.get_model_tx(
                                tx, cast(str, model_name))
                            model_properties.update(
                                qr.get_model_properties_tx(
                                    tx, cast(str, model_name)))

                        if src_model.name == model_name:
                            result.update({
                                "targetValue":
                                to_concept_instance(
                                    record_data,
                                    models[model_name],
                                    model_properties[model_name],
                                )
                            })
                        else:
                            result.update({
                                model_name_or_alias:
                                to_concept_instance(
                                    record_data,
                                    models[model_name],
                                    model_properties[model_name],
                                )
                            })
                        record_ids.append(str(record_data.id))

                    results.append(result)

        AuditLogger.get().message().append("records",
                                           *record_ids).log(x_bf_trace_id)

        return results
Ejemplo n.º 16
0
def records(
    organization_id: int,
    token_info: Claim,
    limit: int,
    offset: int,
    order_direction: str,
    body: JsonDict,
    order_by: Optional[str] = None,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    property_filters: List[PropertyFilter] = PropertyFilter.schema().load(
        body["filters"], many=True)
    dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])]

    results, total_count = db.search_records(
        model_filter=ModelFilter(body["model"]),
        property_filters=property_filters,
        dataset_filters=dataset_filters,
        limit=limit,
        offset=offset,
        order_by=order_by,
        order_direction=OrderDirection.parse(order_direction),
    )
    results = list(results)

    # Deduplicate the set of models and properites represented in the results.
    # TODO: do this in Neo4j so we don't have to send duplicate data over the wire
    model_properties = {
        result.model_id: result.properties
        for result in results
    }

    datasets = {result.model_id: result.dataset for result in results}

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(ds.id) for ds in datasets.values()
                      ]).append("records",
                                *[str(result.record.id)
                                  for result in results]).log(x_bf_trace_id)

    return {
        "models": [{
            "id": model_id,
            "properties": [p.to_dict() for p in properties],
            "dataset": datasets[model_id],
        } for model_id, properties in model_properties.items()],
        "records": [{
            "modelId": result.model_id,
            **result.record.to_dict()
        } for result in results],
        "totalCount":
        total_count,
        "limit":
        limit,
        "offset":
        offset,
    }
Ejemplo n.º 17
0
def packages(organization_id: int, token_info: Claim, limit: int, offset: int,
             body: JsonDict) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    api_client = PennsieveApiClient.get()

    property_filters: List[PropertyFilter] = PropertyFilter.schema().load(
        body["filters"], many=True)

    dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])]

    # 1) Run the query, and get all package
    results, total_count = db.search_packages(
        model_filter=ModelFilter(body["model"]),
        property_filters=property_filters,
        dataset_filters=dataset_filters,
        limit=limit,
        offset=offset,
    )
    results = list(results)

    # 2) Group packages by dataset - the API endpoint to get datasets requires
    # a dataset ID in the URL.
    packages_by_dataset = defaultdict(list)
    for result in results:
        packages_by_dataset[result.dataset].append(result.package)

    package_dtos = []
    package_ids = []

    # 3) Get all package DTOs
    for dataset, packages in packages_by_dataset.items():

        dtos = api_client.get_packages(
            dataset.node_id,
            [package.id for package in packages],
            headers=dict(**auth_header(),
                         **with_trace_id_header(x_bf_trace_id)),
        )

        # 4) Reorder the DTOs in the response to match the order that packages
        # came out of the database. If a package has been deleted from Pennsieve
        # API, but not from Neo4j, it will be missing from the response. Ignore
        # this for now.
        # TODO: https://app.clickup.com/t/2c3ec9
        for package in packages:
            package_ids.append(package.id)
            if package.id in dtos:
                package_dtos.append(dtos[package.id])

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(r.dataset.id) for r in results
                      ]).append("packages", *package_ids).log(x_bf_trace_id)

    return {
        "packages": package_dtos,
        "totalCount": total_count,
        "limit": limit,
        "offset": offset,
    }
Ejemplo n.º 18
0
def records_csv(organization_id: int, token_info: Claim,
                body: JsonDict) -> Response:

    x_bf_trace_id = AuditLogger.trace_id_header()

    try:

        search_params = SearchDownloadRequest.schema().load(
            json.loads(body["data"]))

        db = authorize_search(organization_id, x_bf_trace_id, token_info)

        datasets = {ds.int_id: ds for ds in db.datasets}

        # Write to the audit log:
        logger = AuditLogger.get()
        logger.message().append("organization", organization_id).append(
            "datasets", *[ds.int_id for ds in db.datasets]).log(x_bf_trace_id)

        model_properties = db.suggest_properties(search_params.model)
        columns = {}
        date_columns = set()
        for _, p, _ in model_properties:

            if dt.DataType.is_date(p.data_type):
                date_columns = date_columns.union({p.name})

            unit = dt.DataType.get_unit(p.data_type)
            unit_display_name = None if unit is None else Unit.find_display_name(
                unit)

            display_name = (
                p.display_name if unit is None else
                f"{p.display_name} ({unit if unit_display_name is None else unit_display_name})"
            )

            columns[p.name] = display_name

        response = Response(
            iter_csv(
                db,
                search_params.model,
                search_params.filters,
                search_params.datasets,
                columns,
                date_columns,
                datasets,
                logger,
                x_bf_trace_id,
            ),
            mimetype="text/csv",
        )
        response.headers[
            "Content-Disposition"] = f"attachment; filename={search_params.model.name}{RECORD_DOWNLOAD_EXTENSION}"
        return response

    except Exception as e:
        response = Response(csv_error(e))
        response.headers[
            "Content-Disposition"] = "attachment; filename=!ERROR.txt"
        return response