Esempio n. 1
0
    def patch_trial_metadata(trial_id: str, metadata: dict, session: Session):
        """
            Applies updates to an existing trial metadata record,
            or create a new one if it does not exist.

            Args:
                trial_id: the lead organization study id for this trial
                metadata: a partial metadata object for trial_id

            TODO: implement metadata merging, either here or in cidc_schemas
        """
        # Look for an existing trial
        trial = TrialMetadata.find_by_trial_id(trial_id, session=session)

        if trial:
            # Merge-update metadata into existing trial's metadata_json
            updated_metadata = prism.merge_clinical_trial_metadata(
                metadata, trial.metadata_json
            )
            # Save updates to trial record
            session.query(TrialMetadata).filter_by(trial_id=trial.trial_id).update(
                {
                    "metadata_json": updated_metadata,
                    "_etag": make_etag(trial.trial_id, updated_metadata),
                }
            )
            session.commit()
        else:
            # Create a new trial metadata record, since none exists
            app.logger.info(f"Creating new trial_metadata for trial {trial_id}")
            new_trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata)
            session.add(new_trial)
            session.commit()
Esempio n. 2
0
def run(ts_path: str, mif_path: str, he_path: str, outdir: str):
    """Run and profile a typical metadata validation and merging workload."""
    set_prism_encrypt_key("foobar")

    with profiling("1_prismify_tissue_slide_shipping_manifest", outdir):
        ts_template = Template.from_type("tissue_slide")
        ts_spreadsheet, _ = XlTemplateReader.from_excel(ts_path)
        ts_metadata, _, _ = prismify(ts_spreadsheet, ts_template)
        ts_metadata["allowed_cohort_names"] = ["Not_reported"]
        ts_metadata["allowed_collection_event_names"] = ["Baseline"]

    with profiling("2_prismify_mif_assay_metadata_spreadsheet", outdir):
        mif_template = Template.from_type("mif")
        mif_spreadsheet, _ = XlTemplateReader.from_excel(mif_path)
        mif_metadata, files, _ = prismify(mif_spreadsheet, mif_template)

    with profiling("3_merge_mif_assay_artifacts_into_mif_metadata_patch", outdir):
        # tqdm gives us a stdout progress indicator as prism iterates through the array
        artifact_info = tqdm(
            [
                ArtifactInfo(
                    f.upload_placeholder,
                    f"object/url/{f.upload_placeholder}",
                    "",
                    0,
                    "",
                    "abcd",
                )
                for i, f in enumerate(files)
            ]
        )
        mif_metadata, _ = merge_artifacts(mif_metadata, artifact_info)

    with profiling("4_merge_mif_metadata_with_tissue_slide_metadata", outdir):
        combined_metadata, _ = merge_clinical_trial_metadata(mif_metadata, ts_metadata)

    # Don't profile this a second time, since we're only interested
    # in how long it takes to merge the shipping manifest data into
    # existing trial metadata
    he_template = Template.from_type("h_and_e")
    he_spreadsheet, _ = XlTemplateReader.from_excel(he_path)
    he_metadata, _, _ = prismify(he_spreadsheet, he_template)

    with profiling("5_merge_h_and_e_metadata_into_trial", outdir):
        merge_clinical_trial_metadata(he_metadata, combined_metadata)
def test_merge_artifacts(prism_test: PrismTestData, ct_validator):
    # Some upload types won't have any artifacts to merge
    if len(prism_test.upload_entries) == 0:
        return

    # Merge the upload entries into the prismify patch
    uuids_and_artifacts = []
    patch = deepcopy(prism_test.prismify_patch)
    patch, artifact_results = merge_artifacts(
        patch,
        [
            ArtifactInfo(
                artifact_uuid=entry.upload_placeholder,
                object_url=entry.gs_key,
                upload_type=prism_test.upload_type,
                file_size_bytes=0,
                uploaded_timestamp="",
                md5_hash="foo",
                crc32c_hash="bar",
            ) for entry in prism_test.upload_entries
        ],
    )
    for entry, (artifact,
                additional_metadata) in zip(prism_test.upload_entries,
                                            artifact_results):
        # Check that artifact has expected fields for the given entry
        assert artifact["object_url"] == entry.gs_key
        assert artifact["upload_placeholder"] == entry.upload_placeholder
        assert additional_metadata != {}

        # Keep track of the artifact metadata
        uuids_and_artifacts.append((entry.upload_placeholder, artifact, entry))

    # Check that the artifact objects have been merged in the right places.
    placeholder_key = "['upload_placeholder']"
    for uuid, artifact, entry in uuids_and_artifacts:
        # Get the path in the *original* patch to the placeholder uuid.
        paths = (prism_test.prismify_patch | grep(uuid))["matched_values"]

        assert len(
            paths) == 1, "UUID should only occur once in a metadata patch"
        path = paths.pop()
        assert path.endswith(
            placeholder_key), f"UUID values should have key {placeholder_key}"
        path = path[:-len(placeholder_key)]

        # Ensure that evaluating this same path in the *new* patch gets the expected
        # artifact metadata dictionary.
        assert eval(path, {}, {"root": patch}) == artifact

    # Merge the patch-with-artifacts into the base trial
    result, errs = merge_clinical_trial_metadata(patch, prism_test.base_trial)
    assert len(errs) == 0, "\n".join([str(e) for e in errs])

    # Make sure the modified patch is still valid
    ct_validator.validate(result)
def test_merge_patch_into_trial(prism_test: PrismTestData, ct_validator):
    # Merge the prismify patch into the base trial metadata
    result, errs = merge_clinical_trial_metadata(prism_test.prismify_patch,
                                                 prism_test.base_trial)

    # Ensure no errors resulted from the merge
    assert len(errs) == 0, "\n".join([str(e) for e in errs])

    # Ensure that the merge result passes validation
    ct_validator.validate(result)

    target = prism_test.target_trial
    assert_metadata_matches(result, target, prism_test.upload_entries)
Esempio n. 5
0
        def wrapped(*args, **kwargs):
            logger.info(f"upload_handler({f.__name__}) started")
            template, xlsx_file = extract_schema_and_xlsx(allowed_types)

            errors_so_far = []

            try:
                xlsx, errors = XlTemplateReader.from_excel(xlsx_file)
            except SchemasValidationError as e:
                raise BadRequest({"errors": [str(e)]})
            logger.info(f"xlsx parsed: {len(errors)} errors")
            log_multiple_errors(errors)
            errors_so_far.extend(errors)

            # Run basic validations on the provided Excel file
            validations = validate(template, xlsx)
            logger.info(f"xlsx validated: {len(validations.json['errors'])} errors")
            log_multiple_errors(validations.json["errors"])
            errors_so_far.extend(validations.json["errors"])

            md_patch, file_infos, errors = prism.prismify(xlsx, template)
            logger.info(
                f"prismified: {len(errors)} errors, {len(file_infos)} file_infos"
            )
            log_multiple_errors(errors)
            errors_so_far.extend(errors)

            try:
                trial_id = md_patch[prism.PROTOCOL_ID_FIELD_NAME]
            except KeyError:
                errors_so_far.append(f"{prism.PROTOCOL_ID_FIELD_NAME} field not found.")
                # we can't find trial id so we can't proceed
                raise BadRequest({"errors": [str(e) for e in errors_so_far]})

            trial = TrialMetadata.find_by_trial_id(trial_id)
            if not trial:
                errors_so_far.insert(
                    0,
                    f"Trial with {prism.PROTOCOL_ID_FIELD_NAME}={trial_id!r} not found.",
                )
                # we can't find trial so we can't proceed trying to check_perm or merge
                raise BadRequest({"errors": [str(e) for e in errors_so_far]})

            user = get_current_user()
            try:
                check_permissions(user, trial_id, template.type)
            except Unauthorized as e:
                errors_so_far.insert(0, e.description)
                # unauthorized to pull trial so we can't proceed trying to merge
                raise Unauthorized({"errors": [str(e) for e in errors_so_far]})

            # Try to merge assay metadata into the existing clinical trial metadata
            # Ignoring result as we only want to check there's no validation errors
            try:
                merged_md, errors = prism.merge_clinical_trial_metadata(
                    md_patch, trial.metadata_json
                )
            except ValidationError as e:
                errors_so_far.append(json_validation.format_validation_error(e))
            except prism.MergeCollisionException as e:
                errors_so_far.append(str(e))
            except prism.InvalidMergeTargetException as e:
                # we have an invalid MD stored in db - users can't do anything about it.
                # So we log it
                logger.error(f"Internal error with trial {trial_id!r}\n{e}")
                # and return an error. Though it's not BadRequest but rather an
                # Internal Server error we report it like that, so it will be displayed
                raise BadRequest(
                    f"Internal error with {trial_id!r}. Please contact a CIDC Administrator."
                ) from e
            logger.info(f"merged: {len(errors)} errors")
            log_multiple_errors(errors)
            errors_so_far.extend(errors)

            if errors_so_far:
                raise BadRequest({"errors": [str(e) for e in errors_so_far]})

            return f(
                user,
                trial,
                template.type,
                xlsx_file,
                md_patch,
                file_infos,
                *args,
                **kwargs,
            )