Exemple #1
0
def test_get_package_by_node_id(bf, dataset, package, auth_header, trace_id_headers):
    client = PennsieveApiClient(bf.settings.api_host)
    response = client.get_package_ids(
        dataset.id, package.node_id, headers=dict(**auth_header, **trace_id_headers)
    )

    assert response.id == package.id
    assert response.node_id == package.node_id
Exemple #2
0
def test_get_packages(bf, dataset, package, auth_header, trace_id_headers):
    client = PennsieveApiClient(bf.settings.api_host)
    packages = client.get_packages(
        dataset.id, [package.id], headers=dict(**auth_header, **trace_id_headers)
    )

    assert packages[package.id]["content"]["nodeId"] == package.node_id
    assert packages[package.id]["content"]["id"] == package.id
Exemple #3
0
def test_get_datasets(bf, dataset, package, auth_header, trace_id_headers):
    client = PennsieveApiClient(bf.settings.api_host)
    datasets = client.get_datasets(headers=dict(**auth_header, **trace_id_headers))
    assert len(datasets) > 0

    dataset_ids = client.get_dataset_ids(
        headers=dict(**auth_header, **trace_id_headers)
    )
    assert sorted(d.int_id for d in datasets) == sorted(dataset_ids)
Exemple #4
0
def test_get_packages_ignores_deleted_packages(
    bf, dataset, package, auth_header, trace_id_headers
):
    client = PennsieveApiClient(bf.settings.api_host)
    packages = client.get_packages(
        dataset.id,
        ["N:package:does-not-exist"],
        headers=dict(**auth_header, **trace_id_headers),
    )
    assert len(packages) == 0
Exemple #5
0
def create_proxy_instance(db: PartitionedDatabase, proxy_type: str,
                          body: JsonDict) -> List[JsonDict]:
    response = []

    with db.transaction() as tx:

        x_bf_trace_id = AuditLogger.trace_id_header()
        link_targets = []
        package_ids = []

        for target in body["targets"]:

            link_target = target["linkTarget"]
            relationship_type = target["relationshipType"]

            link_targets.append(link_target)

            concept_link_target = to_proxy_link_target(link_target)
            if concept_link_target is None:
                raise InvalidPackageProxyLinkTargetError(link_target=str(body))

            package = PennsieveApiClient.get().get_package_ids(
                db.dataset_node_id,
                body["external_id"],
                headers=dict(**auth_header(),
                             **with_trace_id_header(x_bf_trace_id)),
            )

            package_ids.append(str(package.id))

            package_proxy = db.create_package_proxy_tx(
                tx=tx,
                record=concept_link_target.id,
                package_id=package.id,
                package_node_id=package.node_id,
                legacy_relationship_type=relationship_type,
            )

            linkResult = {
                "proxyInstance":
                to_proxy_instance(PROXY_TYPE, package_proxy),
                "relationshipInstance":
                make_proxy_relationship_instance(concept_link_target.id,
                                                 package_proxy,
                                                 relationship_type),
            }

            response.append(linkResult)

    AuditLogger.get().message().append("link-targets", *link_targets).append(
        "packages", *package_ids).log(x_bf_trace_id)

    return response, 201
Exemple #6
0
    def wrapper(db: PartitionedDatabase, **kwargs):
        result = func(db, **kwargs)

        config = Config.from_app()
        jwt = service_claim(db.organization_id, db.dataset_id,
                            config.jwt_config)

        try:
            PennsieveApiClient.get().touch_dataset(
                db.organization_id,
                db.dataset_id,
                headers={"Authorization": f"Bearer {jwt}"},
            )
        except Exception as e:
            VictorOpsClient.get().warning(
                f"organization/{db.organization_id}/dataset/{db.dataset_id}",
                f"Couldn't touch dataset {db.dataset_id} for organization={db.organization_id}",
            )
            logger.warn(
                f"couldn't touch timestamp for organization/{db.organization_id}/dataset/{db.dataset_id}: {e}"
            )

        return result
Exemple #7
0
def create_package_proxy(
    db: PartitionedDatabase, record_id: RecordId, package_id: PackageId, body: JsonDict
) -> Tuple[JsonDict, int]:

    x_bf_trace_id = AuditLogger.trace_id_header()

    package = PennsieveApiClient.get().get_package_ids(
        db.dataset_node_id,
        package_id,
        headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)),
    )
    return (
        db.create_package_proxy(
            record_id, package_id=package.id, package_node_id=package.node_id
        ).to_dict(),
        201,
    )
Exemple #8
0
def get_files_paged(
    db: PartitionedDatabase,
    concept_id: str,
    id_: str,
    limit: int = 100,
    offset: int = 0,
    order_by: str = "createdAt",
    ascending: bool = True,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    total_count, package_proxies = db.get_package_proxies_for_record(
        id_, limit=limit, offset=offset)

    package_proxies = list(package_proxies)

    # If any packages cannot be found they will be ignored in this response
    # TODO: https://app.clickup.com/t/3gaec4
    packages = PennsieveApiClient.get().get_packages(
        db.dataset_node_id,
        package_ids=[proxy.package_id for proxy in package_proxies],
        headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)),
    )

    package_proxy_ids = [str(p.id) for p in package_proxies]
    package_ids = packages.keys()

    AuditLogger.get().message().append(
        "package-proxies",
        *package_proxy_ids).append("packages",
                                   *package_ids).log(TraceId(x_bf_trace_id))

    return {
        "limit":
        limit,
        "offset":
        offset,
        "totalCount":
        total_count,
        "results": [[{
            "id": proxy.id
        }, to_legacy_package_dto(packages[proxy.package_id])]
                    for proxy in package_proxies
                    if proxy.package_id in packages],
    }
Exemple #9
0
def authorize_search(organization_id: int, trace_id: TraceId,
                     token_info: Claim):

    if not token_info.is_user_claim:
        raise OAuthProblem("Requires a user claim")

    if not token_info.has_organization_access(
            RoleOrganizationId(organization_id)):
        raise Forbidden

    user_id = UserId(token_info.content.node_id)

    datasets = PennsieveApiClient.get().get_datasets(
        headers=dict(**auth_header(), **with_trace_id_header(trace_id)))

    return SearchDatabase(
        db=current_app.config["db"],
        organization_id=organization_id,
        user_id=user_id,
        datasets=datasets,
    )
Exemple #10
0
def get_files(
    db: PartitionedDatabase,
    concept_id: str,
    id_: str,
    limit: int = 100,
    offset: int = 0,
    order_by: str = "createdAt",
    ascending: bool = True,
) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    _, package_proxies = db.get_package_proxies_for_record(
        id_, limit=limit, offset=offset
    )

    package_proxies = list(package_proxies)

    # If any packages cannot be found they will be ignored in this response
    # TODO: https://app.clickup.com/t/3gaec4
    packages = PennsieveApiClient.get().get_packages(
        db.dataset_node_id,
        package_ids=[proxy.package_id for proxy in package_proxies],
        headers=dict(**auth_header(), **with_trace_id_header(x_bf_trace_id)),
    )

    package_proxy_ids = [str(p.id) for p in package_proxies]
    package_ids = packages.keys()

    AuditLogger.get().message().append("package-proxies", *package_proxy_ids).append(
        "packages", *package_ids
    ).log(TraceId(x_bf_trace_id))

    # Yes, this response is crazy: an array of two-tuples (arrays), containing a
    # single object with the proxy id, and the package DTO.
    return [
        [{"id": proxy.id}, to_legacy_package_dto(packages[proxy.package_id])]
        for proxy in package_proxies
        if proxy.package_id in packages
    ]
Exemple #11
0
def fetch_dataset_id(claim: Claim, node_id: DatasetNodeId) -> DatasetId:
    """
    Given a dataset's node ID, attempt to look up its integer ID, first from
    the JWT claim, then the database, then from the Pennsieve API.

    Raises
    ------
    ExternalRequestError
    """
    for role in claim.content.roles:
        if role.type == JwtRole.DATASET_ROLE and role.node_id == node_id:
            return DatasetId(role.id.id)

    dataset_id = Database.from_server().get_dataset_id(node_id)
    if dataset_id is not None:
        return dataset_id

    return DatasetId(
        PennsieveApiClient.get()
        .get_dataset(node_id, headers=dict(**auth_header(), **trace_id_header_dict()))
        .int_id
    )
Exemple #12
0
def packages(organization_id: int, token_info: Claim, limit: int, offset: int,
             body: JsonDict) -> JsonDict:

    x_bf_trace_id = AuditLogger.trace_id_header()

    db = authorize_search(organization_id, x_bf_trace_id, token_info)

    api_client = PennsieveApiClient.get()

    property_filters: List[PropertyFilter] = PropertyFilter.schema().load(
        body["filters"], many=True)

    dataset_filters = [DatasetFilter(d) for d in body.get("datasets", [])]

    # 1) Run the query, and get all package
    results, total_count = db.search_packages(
        model_filter=ModelFilter(body["model"]),
        property_filters=property_filters,
        dataset_filters=dataset_filters,
        limit=limit,
        offset=offset,
    )
    results = list(results)

    # 2) Group packages by dataset - the API endpoint to get datasets requires
    # a dataset ID in the URL.
    packages_by_dataset = defaultdict(list)
    for result in results:
        packages_by_dataset[result.dataset].append(result.package)

    package_dtos = []
    package_ids = []

    # 3) Get all package DTOs
    for dataset, packages in packages_by_dataset.items():

        dtos = api_client.get_packages(
            dataset.node_id,
            [package.id for package in packages],
            headers=dict(**auth_header(),
                         **with_trace_id_header(x_bf_trace_id)),
        )

        # 4) Reorder the DTOs in the response to match the order that packages
        # came out of the database. If a package has been deleted from Pennsieve
        # API, but not from Neo4j, it will be missing from the response. Ignore
        # this for now.
        # TODO: https://app.clickup.com/t/2c3ec9
        for package in packages:
            package_ids.append(package.id)
            if package.id in dtos:
                package_dtos.append(dtos[package.id])

    # Write to the audit log:
    AuditLogger.get().message().append("organization", organization_id).append(
        "datasets", *[str(r.dataset.id) for r in results
                      ]).append("packages", *package_ids).log(x_bf_trace_id)

    return {
        "packages": package_dtos,
        "totalCount": total_count,
        "limit": limit,
        "offset": offset,
    }
Exemple #13
0
def create_app(
    config: Config = None,
    db: Database = None,
    api_client: PennsieveApiClient = None,
    jobs_client: PennsieveJobsClient = None,
    audit_logger: Auditor = None,
    victor_ops_client: VictorOpsClient = None,
):
    app = App(__name__)

    health = bundled("health.yml")
    internal = bundled("model-service-internal.yml")
    api_v1 = bundled("model-service-v1.yml")
    api_v2 = bundled("model-service-v2.yml")
    api_v2_streaming = bundled("model-service-streaming-v2.yml")

    app.add_api(
        internal, validate_responses=True, pythonic_params=True, base_path="/internal"
    )
    app.add_api(api_v1, validate_responses=True, pythonic_params=True, base_path="/v1")
    app.add_api(api_v2, validate_responses=True, pythonic_params=True, base_path="/v2")

    app.app.json_encoder = CustomizedEncoder

    # Mount the v1 API again with no `v1/` prefix. Ideally this would rewritten
    # in the gateway, but internal services need to be updated to us the `/v1`
    # prefix first. This needs to be merged with `health` so that these routes
    # can share the same base path.
    #
    # See ticket: https://app.clickup.com/t/5mcufd
    root_api = {}
    root_api.update(api_v1)
    root_api["paths"].update(health["paths"])
    app.add_api(root_api, validate_responses=True, pythonic_params=True, base_path="/")

    # Unfortunately the only way to stream responses with connexion is to turn
    # response validation off.
    app.add_api(
        api_v2_streaming,
        validate_responses=False,
        pythonic_params=True,
        base_path="/v2/organizations",
    )

    @app.app.errorhandler(ValueError)
    def handle_value_error(error):
        stacktrace = get_error_context()
        return (
            dict(message=str(error), stacktrace=stacktrace),
            400,
            {"Content-Type": "application/json"},
        )

    @app.app.errorhandler(NotImplementedError)
    def handle_not_implemented_error(error):
        stacktrace = get_error_context()
        return (
            dict(message=str(error), stacktrace=stacktrace),
            415,
            {"Content-Type": "application/json"},
        )

    @app.app.errorhandler(errors.ExternalRequestError)
    def handle_external_request_failure(error):
        return (
            dict(message=str(error)),
            500,
            {"Content-Type": "application/json"},
        )

    @app.app.errorhandler(errors.MissingTraceId)
    @app.app.errorhandler(errors.ModelServiceError)
    @app.app.errorhandler(errors.OperationError)
    @app.app.errorhandler(errors.InvalidOrganizationError)
    @app.app.errorhandler(errors.InvalidDatasetError)
    def handle_service_error(error):
        return error.to_json(), 400, {"Content-Type": "application/json"}

    @app.app.errorhandler(ExpiredSignatureError)
    @app.app.errorhandler(OAuthProblem)
    def handle_auth_error(error):
        return dict(message=str(error)), 401, {"Content-Type": "application/json"}

    @app.app.errorhandler(errors.RecordRelationshipNotFoundError)
    @app.app.errorhandler(errors.LegacyModelRelationshipNotFoundError)
    @app.app.errorhandler(errors.ModelRelationshipNotFoundError)
    @app.app.errorhandler(errors.ModelNotFoundError)
    def handle_not_found(error):
        return error.to_json(), 404, {"Content-Type": "application/json"}

    @app.app.errorhandler(errors.PackageProxyNotFoundError)
    def handle_proxy_package_not_found(error):
        return error.to_json(), 404, {"Content-Type": "application/json"}

    @app.app.errorhandler(errors.ExceededTimeLimitError)
    def handle_operation_timed_out(error):
        return error.to_json(), 408, {"Content-Type": "application/json"}

    @app.app.errorhandler(errors.ModelPropertyInUseError)
    def handle_model_property_in_use(error):
        return error.to_json(), 422, {"Content-Type": "application/json"}

    @app.app.errorhandler(errors.LockedDatasetError)
    def handle_locked_dataset(error):
        return error.to_json(), 423, {"Content-Type": "application/json"}

    if config is None:
        config = Config()
    app.app.config["config"] = config

    if db is None:
        db = Database.from_config(config)
    app.app.config["db"] = db

    if api_client is None:
        api_client = PennsieveApiClient(config.pennsieve_api_host)
    app.app.config["api_client"] = api_client

    if jobs_client is None:
        sqs_client = boto3.client("sqs", region_name=config.aws_region)
        jobs_client = PennsieveJobsClient(sqs_client, config.jobs_sqs_queue_id)
    app.app.config["jobs_client"] = jobs_client

    if victor_ops_client is None:
        victor_ops_client = VictorOpsClient(
            config.victor_ops_url, f"{config.environment}-data-management"
        )
    app.app.config["victor_ops_client"] = victor_ops_client

    if audit_logger is None:
        audit_logger = AuditLogger(GatewayHost(config.gateway_internal_host))
    app.app.config["audit_logger"] = audit_logger

    app.app.after_request(log_request)

    return app