def test_load_subschema():
    """Test that the subschema loading option works as expected."""
    schema = load_and_validate_schema("clinical_trial.json")
    subschema = schema["properties"]["participants"]
    path = "properties/participants"

    assert subschema == load_and_validate_schema(f"clinical_trial.json#{path}")
    assert subschema == load_and_validate_schema(
        f"clinical_trial.json#/{path}")

    with pytest.raises(jsonpointer.JsonPointerException):
        load_and_validate_schema("clinical_trial.json#foo")
Exemple #2
0
def load_schemas() -> dict:
    """
    Load all JSON schemas into a dictionary keyed on the
    schema directory. Values are dictionaries mapping entity
    names to loaded and validated entity schemas.
    """
    schemas = {}
    for root, _, paths in os.walk(SCHEMA_DIR):
        root_schemas = {}
        for path in paths:
            schema_path = os.path.join(root, path)

            def json_to_html(ref):
                """Update refs to refer to the URL of the corresponding documentation."""
                url = ref.replace('.json', '.html')
                url = url.replace('properties/', '')
                url = url.replace('definitions/', '')
                url = url.replace('/', '.')
                return {'url': url}

            schema = load_and_validate_schema(schema_path,
                                              SCHEMA_DIR,
                                              on_refs=json_to_html)

            schema_path = path.replace(".json", ".html").replace("/", ".")
            root_schemas[schema_path] = schema

        relative_root = root.replace(f"{ROOT_DIR}/", "").replace("/", ".")
        relative_root = root.replace(SCHEMA_DIR, "").replace("/", ".")
        relative_root = relative_root.replace(".", "", 1)
        schemas[relative_root] = root_schemas

    return schemas
Exemple #3
0
def _fetch_validator(name):

    schema_root = SCHEMA_DIR
    schema_path = os.path.join(SCHEMA_DIR, "assays/%s_assay.json" % name)
    schema = load_and_validate_schema(schema_path, schema_root)

    # create validator assert schemas are valid.
    return jsonschema.Draft7Validator(schema)
Exemple #4
0
def test_merge_core():

    # create aliquot
    aliquot = {"cimac_aliquot_id": "1234"}

    # create the sample.
    sample = {
        "cimac_sample_id": "S1234",
        "site_sample_id": "blank",
        "aliquots": [aliquot]
    }

    # create the participant
    participant = {
        "cimac_participant_id": "P1234",
        "trial_participant_id": "blank",
        "samples": [sample]
    }

    # create the trial
    ct1 = {"lead_organization_study_id": "test", "participants": [participant]}

    # create validator assert schemas are valid.
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)
    schema = validator.schema
    validator.validate(ct1)

    # create a copy of this, modify participant id
    ct2 = copy.deepcopy(ct1)
    ct2['participants'][0]['cimac_participant_id'] = "PABCD"

    # merge them
    merger = Merger(schema)
    ct3 = merger.merge(ct1, ct2)

    # assert we have two participants and their ids are different.
    assert len(ct3['participants']) == 2
    assert ct3['participants'][0]['cimac_participant_id'] == ct1[
        'participants'][0]['cimac_participant_id']
    assert ct3['participants'][1]['cimac_participant_id'] == ct2[
        'participants'][0]['cimac_participant_id']

    # now lets add a new sample to one of the participants
    ct4 = copy.deepcopy(ct3)
    sample2 = ct4['participants'][0]['samples'][0]
    sample2['cimac_sample_id'] = 'new_id_1'

    ct5 = merger.merge(ct3, ct4)
    assert len(ct5['participants'][0]['samples']) == 2

    # now lets add a new aliquot to one of the samples.
    ct6 = copy.deepcopy(ct5)
    aliquot2 = ct6['participants'][0]['samples'][0]['aliquots'][0]
    aliquot2['cimac_aliquot_id'] = 'new_ali_id_1'

    ct7 = merger.merge(ct5, ct6)
    assert len(ct7['participants'][0]['samples'][0]['aliquots']) == 2
def test_iter_error_messages():
    """Smoke check that _Validator.iter_error_messages returns strings, not ValidationErrors."""
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)

    errs = list(
        validator.iter_error_messages({"protocol_identifier": "foo123"}))
    for err in errs:
        assert isinstance(err, str)
def _fetch_validator(name):

    schema_root = SCHEMA_DIR
    schema_path = os.path.join(SCHEMA_DIR, "%s.json" % name)
    validator = load_and_validate_schema(
        schema_path, schema_root, return_validator=True
    )

    return validator
def _fetch_validator(name):

    schema_root = SCHEMA_DIR
    schema_path = os.path.join(SCHEMA_DIR, "artifacts/artifact_%s.json" % name)
    validator = load_and_validate_schema(schema_path,
                                         schema_root,
                                         return_validator=True)

    # create validator assert schemas are valid.
    return validator
def test_recursive_validations():
    validator = load_and_validate_schema("a.json",
                                         schema_root=TEST_SCHEMA_DIR,
                                         return_validator=True)

    with pytest.raises(jsonschema.ValidationError,
                       match="not of type 'array'"):
        validator.validate({"a_prop": {"recursive_prop": {}}})

    with pytest.raises(jsonschema.ValidationError,
                       match="not of type 'array'"):
        validator.validate({"a_prop": {"recursive_prop": [{}]}})

    with pytest.raises(jsonschema.ValidationError,
                       match="not of type 'array'"):
        validator.validate({"a_prop": {"recursive_prop": [[{}], [], [[]]]}})

    validator.validate({"a_prop": {"recursive_prop": [[[]], [], [[[[]]]]]}})
Exemple #9
0
def test_filepath_gen():

    # create validators
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)
    schema = validator.schema

    # get a specifc template
    for temp_path, xlsx_path in template_paths():

        # extract hint.
        hint = temp_path.split("/")[-1].replace("_template.json", "")

        # TODO: only implemented WES parsing...
        if hint != "wes":
            continue

        # parse the spreadsheet and get the file maps
        ct, file_maps = prismify(xlsx_path, temp_path, assay_hint=hint)

        # assert we have the right counts.
        if hint == "wes":

            # check the number of files present.
            assert len(file_maps) == 6

            # we should have 2 fastq per sample.
            assert 4 == sum(
                [1 for x in file_maps if x['gs_key'].count("fastq") > 0])

            # we should have 2 tot forward.
            assert 2 == sum(
                [1 for x in file_maps if x['gs_key'].count("forward") > 0])
            assert 2 == sum(
                [1 for x in file_maps if x['gs_key'].count("reverse") > 0])

            # we should have 2 text files
            assert 2 == sum(
                [1 for x in file_maps if x['gs_key'].count("txt") > 0])

        # assert works
        validator.validate(ct)
Exemple #10
0
def test_schema(example_path):
    validator = load_and_validate_schema("clinical_trial.json", return_validator=True)

    full_path = os.path.join(*example_path)
    _, fname = example_path

    with open(full_path) as file:
        try:
            ct_example = json.load(file)
        except Exception as e:
            raise Exception(f"Error decoding {example_path}: {e}")

        try:
            validator.validate(ct_example)
        except jsonschema.exceptions.ValidationError as e:
            raise Exception(
                f'Failed to validate {fname}:{"["+"][".join(repr(p) for p in e.absolute_path)+"]"} \
                \n {e.message} \
                \n CT_SCHEMA{"["+"][".join(repr(p) for p in e.absolute_schema_path)+"]"} \
                \n instance {e.instance}'
            )
Exemple #11
0
def test_prism():

    # create validators
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)
    schema = validator.schema

    # get a specifc template
    for temp_path, xlsx_path in template_paths():

        # extract hint.
        hint = temp_path.split("/")[-1].replace("_template.json", "")

        # TODO: only implemented WES parsing...
        if hint != "wes":
            continue

        # turn into object.
        ct, file_maps = prismify(xlsx_path, temp_path, assay_hint=hint)

        # assert works
        validator.validate(ct)
def test_trial_core():

    # load and validate schema.
    schema_root = SCHEMA_DIR
    ct_schema_path = os.path.join(SCHEMA_DIR, "clinical_trial.json")
    pt_schema_path = os.path.join(SCHEMA_DIR, "participant.json")
    sm_schema_path = os.path.join(SCHEMA_DIR, "sample.json")
    al_schema_path = os.path.join(SCHEMA_DIR, "aliquot.json")

    ct_schema = load_and_validate_schema(ct_schema_path, schema_root)
    pt_schema = load_and_validate_schema(pt_schema_path, schema_root)
    sm_schema = load_and_validate_schema(sm_schema_path, schema_root)
    al_schema = load_and_validate_schema(al_schema_path, schema_root)

    # create validator assert schemas are valid.
    ct_validator = jsonschema.Draft7Validator(ct_schema)
    ct_validator.check_schema(ct_schema)

    pt_validator = jsonschema.Draft7Validator(pt_schema)
    pt_validator.check_schema(pt_schema)

    sm_validator = jsonschema.Draft7Validator(sm_schema)
    sm_validator.check_schema(sm_schema)

    al_validator = jsonschema.Draft7Validator(al_schema)
    al_validator.check_schema(al_schema)

    # create some aliquots.
    shipment = {
        "account_number": "account_number",
        "assay_priority": "1",
        "assay_type": "Olink",
        "courier": "USPS",
        "date_received": "date_received",
        "date_shipped": "date_shipped",
        "manifest_id": "manifest_id",
        "quality_of_shipment": "Specimen shipment received in good condition",
        "ship_from": "ship_from",
        "ship_to": "ship_to",
        "shipping_condition": "Ice_Pack",
        "tracking_number": "tracking_number",
        "receiving_party": "MDA_Wistuba",
    }
    aliquot1 = {
        "slide_number": "99",
        "aliquot_replacement": "N/A",
        "aliquot_status": "Other",
    }
    al_validator.validate(aliquot1)

    aliquot2 = {
        "slide_number": "98",
        "aliquot_replacement": "N/A",
        "aliquot_status": "Other",
    }
    al_validator.validate(aliquot2)

    # create some samples.
    sample1 = {
        "cimac_id": "CTTTPPP12.00",
        "parent_sample_id": "ssida",
        "aliquots": [aliquot1],
        "collection_event_name": "Baseline",
        "type_of_primary_container": "Sodium heparin",
        "sample_location": "---",
        "type_of_sample": "Other",
        "sample_volume_units": "Other",
        "material_used": 1,
        "material_remaining": 0,
        "quality_of_sample": "Other",
        "box_number": "1",
    }
    sm_validator.validate(sample1)
    sample2 = {
        "cimac_id": "CTTTPPP12.00",
        "parent_sample_id": "ssidb",
        "aliquots": [aliquot2],
        "collection_event_name": "Baseline",
        "type_of_primary_container": "Sodium heparin",
        "sample_location": "---",
        "type_of_sample": "Other",
    }
    sm_validator.validate(sample2)

    # create a bad participant, then make it good.
    participant = {
        "cimac_participant_id": "CTTTPPP",
        "participant_id": "tpid_a",
        "cohort_name": "Arm_Z",
    }
    with pytest.raises(jsonschema.ValidationError):
        pt_validator.validate(participant)

    # add samples to the participant.
    participant["samples"] = [sample1, sample2]
    pt_validator.validate(participant)

    # validate the positive version works.
    clinical_trial = {
        PROTOCOL_ID_FIELD_NAME: "trial1",
        "allowed_collection_event_names": ["Baseline"],
        "allowed_cohort_names": ["Arm_Z"],
        "participants": [participant],
        "shipments": [shipment],
    }
    ct_validator.validate(clinical_trial)

    # make it fail
    participant.pop("cimac_participant_id")
    with pytest.raises(jsonschema.ValidationError):
        ct_validator.validate(clinical_trial)
 def load():
     load_and_validate_schema("clinical_trial.json")
Exemple #14
0
def prismify(
    xlsx: XlTemplateReader,
    template: Template,
    schema_root: str = SCHEMA_DIR,
    debug: bool = False,
) -> (dict, List[LocalFileUploadEntry], List[Union[Exception, str]]):
    """
    Converts excel file to json object. It also identifies local files
    which need to uploaded to a google bucket and provides some logic
    to help build the bucket url.
    e.g. file list
    [
        {
            'local_path': '/path/to/fwd.fastq',
            'gs_key': '10021/CTTTPPPSS/wes_forward.fastq'
        }
    ]
    Args:
        xlsx: cidc_schemas.template_reader.XlTemplateReader instance
        template: cidc_schemas.template.Template instance
        schema_root: path to the target JSON schema, defaulting to CIDC schemas root
    Returns:
        (tuple):
            arg1: clinical trial object with data parsed from spreadsheet
            arg2: list of `LocalFileUploadEntry`s that describe each file identified:
                LocalFileUploadEntry(
                    local_path = "/local/path/to/a/data/file/parsed/from/template",
                    gs_key = "constructed/relative/to/clinical/trial/GCS/path",
                    upload_placeholder = "random_uuid-for-artifact-upload",
                    metadata_availability = boolean to indicate whether LocalFileUploadEntry should be extracted for metadata files
                )
            arg3: list of errors
    Process:
    * checks out `prism_preamble_object_pointer` which is a "standard"/absolute
    rfc6901 json-pointer from CT root object to a new assay location.
    E.g. for WES it is `/assays/wes/0`, in DeepDiff terms `ct["assays"]["wes"][0]`
    * creates such "parent/preamble" object.
    E.g. for WES an object that corresponds to a wes_assay will be created:
        {
          "assays": {
            "wes": [
              {
                ...    # we're here - this is "preamble" obj = "assay" obj
              }
            ]
          }
        }
    * then processes all "preamble_rows" properties from "..._template.json"
    to fill object's properties. It uses "merge_pointer"s relative to this
    "parent/preamble" object to determine exact location where to set value.
    In most cases it's just "0/field_name". Where "0" denotes that "field_name"
    is a field in the current object.
    With exceptions like - "3/protocol_identifier" which says basically
    "go 3 levels up in the hierarchy and take protocol_identifier field of the root".
    E.g. WES:
        {
          "protocol_identifier": "4412" # from `3/protocol_identifier`
          "assays": {
            "wes": [
              {
                "assay_creator": "DFCI" # from `0/assay_creator`
              }
            ]
          }
        }
    * then it goes in a loop over all "record" rows in .xlsx, and creates
    an object within that "parent" object for each row. These "record-objects"
    are created at "prism_data_object_pointer" location relative to "preamble".

    E.g. for WES: `"prism_data_object_pointer" : "/records/-"`
        {
          "assays": {
            "wes": [
              {
                "assay_creator": "DFCI",
                "records": [
                  {
                    ...    # we're here - this is "record" obj = "assay entry" obj
                  }
                ]
              }
            ]
          }
        }
    NB Minus sign at the end of "/records/-" is a special relative-json-pointer
    notation that means we need to create new object in an 'record' array.
    So it's like if python's `l.append(v)` would've been `l[-] = v`.
    * Prism now uses those "merge_pointer" relative to this "record" object,
    to populate field values of a "record" in the same way as with "preamble".
    E.g. for WES: `"prism_data_object_pointer" : "/records/-"`
        {
          "assays": {
            "wes": [
              {
                "assay_creator": "DFCI",
                "records": [
                  {
                    "cimac_id": ...                 # from "0/cimac_id",
                    "enrichment_vendor_lot": ...    # from "0/enrichment_vendor_lot",
                    "capture_date": ...             # from "0/capture_date",
                  }
                ]
              }
            ]
          }
        }
    * Finally, as there were many "records" object created/populated,
    Prism now uses `prism_preamble_object_schema` to merge all that together
    with respect to `mergeStrategy`es defined in that schema.
    """

    _check_encrypt_init()

    if template.type not in SUPPORTED_TEMPLATES:
        raise NotImplementedError(
            f"{template.type!r} is not supported, only {SUPPORTED_TEMPLATES} are."
        )

    errors_so_far = []

    # get the root CT schema
    root_ct_schema_name = (
        template.schema.get("prism_template_root_object_schema")
        or "clinical_trial.json"
    )
    root_ct_schema = load_and_validate_schema(root_ct_schema_name, schema_root)
    # create the result CT dictionary
    root_ct_obj = {}
    template_root_obj_pointer = template.schema.get(
        "prism_template_root_object_pointer", ""
    )
    if template_root_obj_pointer != "":
        template_root_obj = {}
        _set_val(template_root_obj_pointer, template_root_obj, root_ct_obj)
    else:
        template_root_obj = root_ct_obj

    # and merger for it
    root_ct_merger = Merger(root_ct_schema, strategies=PRISM_MERGE_STRATEGIES)
    # and where to collect all local file refs
    collected_files = []

    # loop over spreadsheet worksheets
    for ws_name, ws in xlsx.grouped_rows.items():
        logger.debug(f"next worksheet {ws_name!r}")

        # Here we take only first two cells from preamble as key and value respectfully,
        # lowering keys to match template schema definitions.
        preamble_context = dict(
            (r.values[0].lower(), r.values[1]) for r in ws.get(RowType.PREAMBLE, [])
        )
        # We need this full "preamble dict" (all key-value pairs) prior to processing
        # properties from data_columns or preamble wrt template schema definitions, because
        # there can be a 'gcs_uri_format' that needs to have access to all values.

        templ_ws = template.schema["properties"]["worksheets"].get(ws_name)
        if not templ_ws:
            if ws_name in template.ignored_worksheets:
                continue

            errors_so_far.append(f"Unexpected worksheet {ws_name!r}.")
            continue

        preamble_object_schema = load_and_validate_schema(
            templ_ws.get("prism_preamble_object_schema", root_ct_schema_name),
            schema_root,
        )
        preamble_merger = Merger(
            preamble_object_schema, strategies=PRISM_MERGE_STRATEGIES
        )
        preamble_object_pointer = templ_ws.get("prism_preamble_object_pointer", "")
        data_object_pointer = templ_ws["prism_data_object_pointer"]

        # creating preamble obj
        preamble_obj = {}

        # Processing data rows first
        data = ws[RowType.DATA]
        if data:
            # get the data
            headers = ws[RowType.HEADER][0]

            # for row in data:
            for row in data:

                logging.debug(f"  next data row {row!r}")

                # creating data obj
                data_obj = {}
                copy_of_preamble = {}
                _set_val(
                    data_object_pointer,
                    data_obj,
                    copy_of_preamble,
                    template_root_obj,
                    preamble_object_pointer,
                )

                # We create this "data record dict" (all key-value pairs) prior to processing
                # properties from data_columns wrt template schema definitions, because
                # there can be a 'gcs_uri_format' that needs to have access to all values.
                local_context = dict(
                    zip([h.lower() for h in headers.values], row.values)
                )

                # create dictionary per row
                for key, val in zip(headers.values, row.values):

                    combined_context = dict(local_context, **preamble_context)
                    try:
                        changes, new_files = template.process_field_value(
                            ws_name, key, val, combined_context, _encrypt
                        )
                    except ParsingException as e:
                        errors_so_far.append(e)
                    else:
                        _apply_changes(
                            changes, data_obj, copy_of_preamble, data_object_pointer
                        )
                        collected_files.extend(new_files)

                try:
                    preamble_obj = preamble_merger.merge(preamble_obj, copy_of_preamble)
                except MergeCollisionException as e:
                    # Reformatting exception, because this mismatch happened within one template
                    # and not with some saved stuff.
                    wrapped = e.with_context(row=row.row_num, worksheet=ws_name)
                    errors_so_far.append(wrapped)
                    logger.info(f"MergeCollisionException: {wrapped}")

        # Now processing preamble rows
        logger.debug(f"  preamble for {ws_name!r}")
        for row in ws[RowType.PREAMBLE]:
            k, v, *_ = row.values
            try:
                changes, new_files = template.process_field_value(
                    ws_name, k, v, preamble_context, _encrypt
                )
            except ParsingException as e:
                errors_so_far.append(e)
            else:
                # TODO we might want to use copy+preamble_merger here too,
                # to for complex properties that require mergeStrategy
                _apply_changes(
                    changes,
                    preamble_obj,
                    root_ct_obj,
                    template_root_obj_pointer + preamble_object_pointer,
                )
                collected_files.extend(new_files)

        # Now pushing it up / merging with the whole thing
        copy_of_templ_root = {}
        _set_val(preamble_object_pointer, preamble_obj, copy_of_templ_root)
        logger.debug("merging root objs")
        logger.debug(f" {template_root_obj}")
        logger.debug(f" {copy_of_templ_root}")
        template_root_obj = root_ct_merger.merge(template_root_obj, copy_of_templ_root)
        logger.debug(f"  merged - {template_root_obj}")

    if template_root_obj_pointer != "":
        _set_val(template_root_obj_pointer, template_root_obj, root_ct_obj)
    else:
        root_ct_obj = template_root_obj

    return root_ct_obj, collected_files, errors_so_far
Exemple #15
0
def test_assay_merge():

    # two wes assays.
    a1 = {
        "lead_organization_study_id":
        "10021",
        "participants": [{
            "samples": [{
                "genomic_source":
                "Tumor",
                "aliquots": [{
                    "assay": {
                        "wes": {
                            "assay_creator":
                            "Mount Sinai",
                            "assay_category":
                            "Whole Exome Sequencing (WES)",
                            "enrichment_vendor_kit":
                            "Twist",
                            "library_vendor_kit":
                            "KAPA - Hyper Prep",
                            "sequencer_platform":
                            "Illumina - NextSeq 550",
                            "paired_end_reads":
                            "Paired",
                            "read_length":
                            100,
                            "records": [{
                                "library_kit_lot": "lot abc",
                                "enrichment_vendor_lot": "lot 123",
                                "library_prep_date": "2019-05-01 00:00:00",
                                "capture_date": "2019-05-02 00:00:00",
                                "input_ng": 100,
                                "library_yield_ng": 700,
                                "average_insert_size": 250
                            }]
                        }
                    },
                    "cimac_aliquot_id": "Aliquot 1"
                }],
                "cimac_sample_id":
                "Sample 1"
            }],
            "cimac_participant_id":
            "Patient 1"
        }]
    }

    # create a2 and modify ids to trigger merge behavior
    a2 = copy.deepcopy(a1)
    a2['participants'][0]['samples'][0][
        'cimac_sample_id'] = "something different"

    # create validator assert schemas are valid.
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)
    schema = validator.schema

    # merge them
    merger = Merger(schema)
    a3 = merger.merge(a1, a2)
    assert len(a3['participants']) == 1
    assert len(a3['participants'])
Exemple #16
0
def test_trial_core():

    # load and validate schema.
    schema_root = SCHEMA_DIR
    ct_schema_path = os.path.join(SCHEMA_DIR, "clinical_trial.json")
    pt_schema_path = os.path.join(SCHEMA_DIR, "participant.json")
    sm_schema_path = os.path.join(SCHEMA_DIR, "sample.json")
    al_schema_path = os.path.join(SCHEMA_DIR, "aliquot.json")

    ct_schema = load_and_validate_schema(ct_schema_path, schema_root)
    pt_schema = load_and_validate_schema(pt_schema_path, schema_root)
    sm_schema = load_and_validate_schema(sm_schema_path, schema_root)
    al_schema = load_and_validate_schema(al_schema_path, schema_root)

    # create validator assert schemas are valid.
    ct_validator = jsonschema.Draft7Validator(ct_schema)
    ct_validator.check_schema(ct_schema)

    pt_validator = jsonschema.Draft7Validator(pt_schema)
    pt_validator.check_schema(pt_schema)

    sm_validator = jsonschema.Draft7Validator(sm_schema)
    sm_validator.check_schema(sm_schema)

    al_validator = jsonschema.Draft7Validator(al_schema)
    al_validator.check_schema(al_schema)

    # create some aliquots.
    shipment = {"request": "DFCI"}
    aliquot1 = {"cimac_aliquot_id": "c1d1"}

    aliquot2 = {"cimac_aliquot_id": "c1d2"}
    al_validator.validate(aliquot1)
    al_validator.validate(aliquot2)

    # create some samples.
    sample1 = {
        "cimac_sample_id": "csid1",
        "site_sample_id": "ssida",
        "aliquots": [aliquot1]
    }
    sample2 = {
        "cimac_sample_id": "csid12",
        "site_sample_id": "ssidb",
        "aliquots": [aliquot2]
    }
    sm_validator.validate(sample1)
    sm_validator.validate(sample2)

    # create a bad participant, then make it good.
    participant = {
        "cimac_participant_id": "cpid_1",
        "trial_participant_id": "tpid_a"
    }
    with pytest.raises(jsonschema.ValidationError):
        pt_validator.validate(participant)

    # add samples to the participant.
    participant["samples"] = [sample1, sample2]
    pt_validator.validate(participant)

    # validate the positive version works.
    clinical_trial = {
        "lead_organization_study_id": "trial1",
        "participants": [participant],
        "shipments": [shipment]
    }
    ct_validator.validate(clinical_trial)

    # make it fail
    participant.pop('cimac_participant_id')
    with pytest.raises(jsonschema.ValidationError):
        ct_validator.validate(clinical_trial)
def ct_validator():
    return load_and_validate_schema("clinical_trial.json",
                                    return_validator=True)
Exemple #18
0
def test_additionalProperties():
    ct_schema = load_and_validate_schema(
        os.path.join(SCHEMA_DIR, "clinical_trial.json")
    )
    recursive_additionalProperties(ct_schema, "")
Exemple #19
0
def test_schema(schema_path):
    """Ensure the schema file conforms to JSON schema draft 7"""
    assert load_and_validate_schema(schema_path)
def test_cytof():

    # test artifact sub schema
    schema_root = SCHEMA_DIR
    schema_path = os.path.join(
        SCHEMA_DIR, "assays/cytof_assay_core.json#definitions/input_files"
    )
    schema = load_and_validate_schema(schema_path, schema_root)
    validator = jsonschema.Draft7Validator(schema)

    fcs_1 = ARTIFACT_OBJ.copy()
    fcs_1["data_format"] = "FCS"
    fcs_2 = ARTIFACT_OBJ.copy()
    fcs_2["data_format"] = "FCS"
    fcs_3 = ARTIFACT_OBJ.copy()
    fcs_3["data_format"] = "FCS"
    fcs_4 = ARTIFACT_OBJ.copy()
    fcs_4["data_format"] = "FCS"
    sample_records = {"processed_fcs": fcs_1}
    validator.validate(sample_records)

    # create the cytof object
    cytof_platform = {"instrument": "dummy"}

    # create a cytof antibody object.
    antibodies = [
        {
            "antibody": "CD8",
            "isotope": "dummy",
            "dilution": "dummy",
            "stain_type": "Intracellular",
            "usage": "Analysis Only",
        },
        {
            "antibody": "PD-L1",
            "isotope": "dummy",
            "dilution": "dummy",
            "stain_type": "Intracellular",
            "usage": "Used",
        },
    ]
    cytof_panel = {
        "assay_run_id": "run_1",
        "batch_id": "XYZ",
        "cytof_antibodies": antibodies,
        "source_fcs": [fcs_2, fcs_3],
    }

    obj = {**ASSAY_CORE, **cytof_platform, **cytof_panel}  # merge three dictionaries

    # create the cytof object
    fcs_1 = ARTIFACT_OBJ.copy()
    fcs_1["data_format"] = "FCS"
    fcs_2 = ARTIFACT_OBJ.copy()
    fcs_2["data_format"] = "FCS"
    fcs_3 = ARTIFACT_OBJ.copy()
    fcs_3["data_format"] = "FCS"
    assignment = ARTIFACT_OBJ.copy()
    assignment["data_format"] = "CSV"
    compartment = ARTIFACT_OBJ.copy()
    compartment["data_format"] = "CSV"
    profiling = ARTIFACT_OBJ.copy()
    profiling["data_format"] = "CSV"
    cell_count_assignment = ARTIFACT_OBJ.copy()
    cell_count_assignment["data_format"] = "CSV"
    cell_count_compartment = ARTIFACT_OBJ.copy()
    cell_count_compartment["data_format"] = "CSV"
    cell_count_profiling = ARTIFACT_OBJ.copy()
    cell_count_profiling["data_format"] = "CSV"
    report = ARTIFACT_OBJ.copy()
    report["data_format"] = "ZIP"
    analysis = ARTIFACT_OBJ.copy()
    analysis["data_format"] = "ZIP"
    records = {
        "cimac_id": "CTTTPPPSA.00",
        "input_files": {"processed_fcs": fcs_1},
        "output_files": {
            "fcs_file": fcs_1,
            "assignment": assignment,
            "compartment": compartment,
            "profiling": profiling,
            "cell_counts_assignment": assignment,
            "cell_counts_compartment": compartment,
            "cell_counts_profiling": profiling,
        },
    }

    # add a demo sample-level record.
    obj["records"] = [records]

    # create validator assert schemas are valid.
    validator = _fetch_validator("cytof")
    validator.validate(obj)
Exemple #21
0
def prismify(xlsx_path: str,
             template_path: str,
             assay_hint: str = "",
             verb: bool = False) -> (dict, dict):
    """
    Converts excel file to json object. It also identifies local files
    which need to uploaded to a google bucket and provides some logic
    to help build the bucket url.

    e.g. file list
    [
        {
            'local_path': '/path/to/fwd.fastq', 
            'gs_key': '10021/Patient_1/sample_1/aliquot_1/wes_forward.fastq'
        }
    ]


    Args:
        xlsx_path: file on file system to excel file.
        template_path: path on file system relative to schema root of the 
                        temaplate
                
        assay_hint: string used to help idnetify properties in template. Must 
                    be the the root of the template filename i.e. 
                    wes_template.json would be wes.
        verb: boolean indicating verbosity

    Returns:
        (tuple):
            arg1: clinical trial object with data parsed from spreadsheet
            arg2: list of objects which describe each file identified.
    """

    # get the schema and validator
    validator = load_and_validate_schema("clinical_trial.json",
                                         return_validator=True)
    schema = validator.schema

    # this lets us lookup xlsx-to-schema keys
    key_lu = _load_keylookup(template_path)

    # this helps us identify file paths in xlsx
    fp_lu = _build_fplu(assay_hint)

    # add a special key to track the files
    fp_lu['special'] = list()

    # read the excel file
    t = XlTemplateReader.from_excel(xlsx_path)

    # create the root dictionary.
    root = {}
    data_rows = []

    # loop over spreadsheet
    worksheet_names = t.grouped_rows.keys()
    for name in worksheet_names:

        # get the worksheat.
        ws = t.grouped_rows[name]

        # Compare preamble rows
        for row in ws[RowType.PREAMBLE]:

            # process this property
            _process_property(row, key_lu, schema, root, assay_hint, fp_lu,
                              verb)

        # move to headers
        headers = ws[RowType.HEADER][0]

        # get the data.
        data = ws[RowType.DATA]
        for row in data:

            # create dictionary per row
            curd = copy.deepcopy(root)
            for key, val in zip(headers, row):

                # process this property
                _process_property([key, val], key_lu, schema, curd, assay_hint,
                                  fp_lu, verb)

            # save the entry
            data_rows.append(curd)

    # create the merger
    merger = Merger(schema)

    # iteratively merge.
    cur_obj = data_rows[0]
    for i in range(1, len(data_rows)):
        cur_obj = merger.merge(cur_obj, data_rows[i])

    # return the object.
    return cur_obj, fp_lu['special']