Beispiel #1
0
def test_permissions_revoke_all_iam_permissions(clean_db, monkeypatch):
    """
    Smoke test that Permissions.revoke_all_iam_permissions calls revoke_download_access the right arguments.
    """
    gcloud_client = mock_gcloud_client(monkeypatch)
    user = Users(email="*****@*****.**")
    user.insert()
    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()

    upload_types = ["wes_bam", "ihc", "rna_fastq", "plasma"]
    for upload_type in upload_types:
        Permissions(
            granted_to_user=user.id,
            trial_id=trial.trial_id,
            upload_type=upload_type,
            granted_by_user=user.id,
        ).insert()

    Permissions.revoke_all_iam_permissions()
    gcloud_client.revoke_download_access.assert_has_calls(
        [call(user.email, trial.trial_id, upload_type) for upload_type in upload_types]
    )

    # not called on admins or nci biobank users
    gcloud_client.revoke_download_access.reset_mock()
    for role in [CIDCRole.ADMIN.value, CIDCRole.NCI_BIOBANK_USER.value]:
        user.role = role
        user.update()
        Permissions.revoke_all_iam_permissions()
        gcloud_client.revoke_download_access.assert_not_called()
Beispiel #2
0
def test_downloadable_files_additional_metadata_default(clean_db):
    TrialMetadata.create(TRIAL_ID, METADATA)
    df = DownloadableFiles(
        trial_id=TRIAL_ID,
        upload_type="wes_bam",
        object_url="10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq",
        file_size_bytes=1,
        md5_hash="hash1234",
        uploaded_timestamp=datetime.now(),
    )

    # Check no value passed
    df.insert()
    assert df.additional_metadata == {}

    for nullish_value in [None, "null", {}]:
        df.additional_metadata = nullish_value
        df.update()
        assert df.additional_metadata == {}

    # Non-nullish value doesn't get overridden
    non_nullish_value = {"foo": "bar"}
    df.additional_metadata = non_nullish_value
    df.update()
    assert df.additional_metadata == non_nullish_value
Beispiel #3
0
def test_create_downloadable_file_from_metadata(db, monkeypatch):
    """Try to create a downloadable file from artifact_core metadata"""
    # fake file metadata
    file_metadata = {
        "artifact_category": "Assay Artifact from CIMAC",
        "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq",
        "file_name": "wes_forward.fastq",
        "file_size_bytes": 1,
        "md5_hash": "hash1234",
        "uploaded_timestamp": datetime.now(),
        "foo": "bar",  # unsupported column - should be filtered
    }

    # Create the trial (to avoid violating foreign-key constraint)
    TrialMetadata.patch_trial_metadata(TRIAL_ID, METADATA)
    # Create the file
    DownloadableFiles.create_from_metadata(TRIAL_ID, "wes", file_metadata)

    # Check that we created the file
    new_file = (db.query(DownloadableFiles).filter_by(
        file_name=file_metadata["file_name"]).first())
    assert new_file
    del file_metadata["foo"]
    for k in file_metadata.keys():
        assert getattr(new_file, k) == file_metadata[k]
Beispiel #4
0
def test_permissions_delete(clean_db, monkeypatch, caplog):
    gcloud_client = mock_gcloud_client(monkeypatch)
    user = Users(email="*****@*****.**")
    user.insert()
    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()
    perm = Permissions(
        granted_to_user=user.id,
        trial_id=trial.trial_id,
        upload_type="wes_bam",
        granted_by_user=user.id,
    )
    perm.insert()

    # Deleting a record by a user doesn't exist leads to an error
    gcloud_client.reset_mocks()
    with pytest.raises(NoResultFound, match="no user with id"):
        perm.delete(deleted_by=999999)

    # Deletion of an existing permission leads to no error
    gcloud_client.reset_mocks()
    with caplog.at_level(logging.DEBUG):
        perm.delete(deleted_by=user.id)
    gcloud_client.revoke_download_access.assert_called_once()
    gcloud_client.grant_download_access.assert_not_called()
    assert any(
        log_record.message.strip()
        == f"admin-action: {user.email} removed from {user.email} the permission wes_bam on {trial.trial_id}"
        for log_record in caplog.records
    )

    # Deleting an already-deleted record is idempotent
    gcloud_client.reset_mocks()
    perm.delete(deleted_by=user)
    gcloud_client.revoke_download_access.assert_called_once()
    gcloud_client.grant_download_access.assert_not_called()

    # Deleting a record whose user doesn't exist leads to an error
    gcloud_client.reset_mocks()
    with pytest.raises(NoResultFound, match="no user with id"):
        Permissions(granted_to_user=999999).delete(deleted_by=user)

    gcloud_client.revoke_download_access.assert_not_called()
    gcloud_client.grant_download_access.assert_not_called()

    # If revoking a permission from a "network-viewer", no GCS IAM actions are taken
    gcloud_client.revoke_download_access.reset_mock()
    user.role = CIDCRole.NETWORK_VIEWER.value
    user.update()
    perm = Permissions(
        granted_to_user=user.id,
        trial_id=trial.trial_id,
        upload_type="ihc",
        granted_by_user=user.id,
    )
    perm.insert()
    perm.delete(deleted_by=user)
    gcloud_client.revoke_download_access.assert_not_called()
Beispiel #5
0
def test_create_downloadable_file_from_metadata(clean_db, monkeypatch):
    """Try to create a downloadable file from artifact_core metadata"""
    # fake file metadata
    file_metadata = {
        "object_url": "10021/Patient 1/sample 1/aliquot 1/wes_forward.fastq",
        "file_size_bytes": 1,
        "md5_hash": "hash1234",
        "uploaded_timestamp": datetime.now(),
        "foo": "bar",  # unsupported column - should be filtered
    }
    additional_metadata = {"more": "info"}

    # Mock artifact upload publishing
    publisher = MagicMock()
    monkeypatch.setattr("cidc_api.models.models.publish_artifact_upload", publisher)

    # Create the trial (to avoid violating foreign-key constraint)
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Create files with empty or "null" additional metadata
    for nullish_value in ["null", None, {}]:
        df = DownloadableFiles.create_from_metadata(
            TRIAL_ID, "wes_bam", file_metadata, additional_metadata=nullish_value
        )
        clean_db.refresh(df)
        assert df.additional_metadata == {}

    # Create the file
    DownloadableFiles.create_from_metadata(
        TRIAL_ID, "wes_bam", file_metadata, additional_metadata=additional_metadata
    )

    # Check that we created the file
    new_file = (
        clean_db.query(DownloadableFiles)
        .filter_by(object_url=file_metadata["object_url"])
        .first()
    )
    assert new_file
    del file_metadata["foo"]
    for k in file_metadata.keys():
        assert getattr(new_file, k) == file_metadata[k]
    assert new_file.additional_metadata == additional_metadata

    # Check that no artifact upload event was published
    publisher.assert_not_called()

    # Check that artifact upload publishes
    DownloadableFiles.create_from_metadata(
        TRIAL_ID,
        "wes_bam",
        file_metadata,
        additional_metadata=additional_metadata,
        alert_artifact_upload=True,
    )
    publisher.assert_called_once_with(file_metadata["object_url"])
Beispiel #6
0
def test_user_get_data_access_report(clean_db, monkeypatch):
    """Test that user data access info is collected as expected"""
    mock_gcloud_client(monkeypatch)

    admin_user = Users(
        email="*****@*****.**",
        organization="CIDC",
        approval_date=datetime.now(),
        role=CIDCRole.ADMIN.value,
    )
    admin_user.insert()

    cimac_user = Users(
        email="*****@*****.**",
        organization="DFCI",
        approval_date=datetime.now(),
        role=CIDCRole.CIMAC_USER.value,
    )
    cimac_user.insert()

    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()

    upload_types = ["wes_bam", "ihc"]

    # Note that admins don't need permissions to view data,
    # so we're deliberately issuing unnecessary permissions here.
    for user in [admin_user, cimac_user]:
        for t in upload_types:
            Permissions(
                granted_to_user=user.id,
                granted_by_user=admin_user.id,
                trial_id=trial.trial_id,
                upload_type=t,
            ).insert()

    bio = io.BytesIO()
    result_df = Users.get_data_access_report(bio)
    bio.seek(0)

    # Make sure bytes were written to the BytesIO instance
    assert bio.getbuffer().nbytes > 0

    # Make sure report data has expected info
    assert set(result_df.columns) == set(
        ["email", "role", "organization", "trial_id", "permissions"]
    )
    for user in [admin_user, cimac_user]:
        user_df = result_df[result_df.email == user.email]
        assert set([user.role]) == set(user_df.role)
        assert set([user.organization]) == set(user_df.organization)
        if user == admin_user:
            assert set(["*"]) == set(user_df.permissions)
        else:
            assert set(user_df.permissions).issubset(["wes_bam,ihc", "ihc,wes_bam"])
Beispiel #7
0
def test_partial_patch_trial_metadata(clean_db):
    """Update an existing trial_metadata_record"""
    # Create the initial trial

    clean_db.add(TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA))
    clean_db.commit()

    # Create patch without all required fields (no "participants")
    metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID, "assays": {}}

    # patch it - should be no error/exception
    TrialMetadata._patch_trial_metadata(TRIAL_ID, metadata_patch)
Beispiel #8
0
def test_downloadable_files_get_related_files(clean_db):
    # Create a trial to avoid constraint errors
    TrialMetadata.create(trial_id=TRIAL_ID, metadata_json=METADATA)

    # Convenience function for building file records
    def create_df(facet_group, additional_metadata={}) -> DownloadableFiles:
        df = DownloadableFiles(
            facet_group=facet_group,
            additional_metadata=additional_metadata,
            trial_id=TRIAL_ID,
            uploaded_timestamp=datetime.now(),
            file_size_bytes=0,
            object_url=facet_group,  # just filler, not relevant to the test
            upload_type="",
        )
        df.insert()
        clean_db.refresh(df)
        return df

    # Set up test data
    cimac_id_1 = "CTTTPPP01.01"
    cimac_id_2 = "CTTTPPP02.01"
    files = [
        create_df(
            "/cytof/normalized_and_debarcoded.fcs", {"some.path.cimac_id": cimac_id_1}
        ),
        create_df(
            "/cytof_analysis/assignment.csv",
            # NOTE: this isn't realistic - assignment files aren't sample-specific - but
            # it serves the purpose of the test.
            {"path.cimac_id": cimac_id_1, "another.path.cimac_id": cimac_id_1},
        ),
        create_df("/cytof_analysis/source.fcs", {"path.to.cimac_id": cimac_id_2}),
        create_df("/cytof_analysis/reports.zip"),
        create_df("/cytof_analysis/analysis.zip"),
        create_df("/wes/r1_L.fastq.gz"),
    ]

    # Based on setup, we expect the following disjoint sets of related files:
    related_file_groups = [
        [files[0], files[1]],
        [files[2]],
        [files[3], files[4]],
        [files[5]],
    ]

    # Check that get_related_files returns what we expect
    for file_group in related_file_groups:
        for file_record in file_group:
            other_ids = [f.id for f in file_group if f.id != file_record.id]
            related_files = file_record.get_related_files()
            assert set([f.id for f in related_files]) == set(other_ids)
            assert len(related_files) == len(other_ids)
Beispiel #9
0
def test_upload_job_no_file_map(clean_db):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID}
    gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095"

    TrialMetadata.create(TRIAL_ID, METADATA)

    new_job = UploadJobs.create(
        prism.SUPPORTED_MANIFESTS[0], EMAIL, None, metadata_patch, gcs_xlsx_uri
    )
    assert list(new_job.upload_uris_with_data_uris_with_uuids()) == []

    job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"])
    assert list(job.upload_uris_with_data_uris_with_uuids()) == []
Beispiel #10
0
def test_assay_upload_ingestion_success(clean_db, monkeypatch, caplog):
    """Check that the ingestion success method works as expected"""
    caplog.set_level(logging.DEBUG)

    new_user = Users.create(PROFILE)
    trial = TrialMetadata.create(TRIAL_ID, METADATA)
    assay_upload = UploadJobs.create(
        upload_type="ihc",
        uploader_email=EMAIL,
        gcs_file_map={},
        metadata={PROTOCOL_ID_FIELD_NAME: TRIAL_ID},
        gcs_xlsx_uri="",
        commit=False,
    )

    clean_db.commit()

    # Ensure that success can't be declared from a starting state
    with pytest.raises(Exception, match="current status"):
        assay_upload.ingestion_success(trial)

    # Update assay_upload status to simulate a completed but not ingested upload
    assay_upload.status = UploadJobStatus.UPLOAD_COMPLETED.value
    assay_upload.ingestion_success(trial)

    # Check that status was updated and email wasn't sent by default
    db_record = UploadJobs.find_by_id(assay_upload.id)
    assert db_record.status == UploadJobStatus.MERGE_COMPLETED.value
    assert "Would send email with subject '[UPLOAD SUCCESS]" not in caplog.text

    # Check that email gets sent when specified
    assay_upload.ingestion_success(trial, send_email=True)
    assert "Would send email with subject '[UPLOAD SUCCESS]" in caplog.text
def setup_trial_and_user(cidc_api, monkeypatch) -> int:
    """
    Insert a trial and a cimac-user into the database, and set the user
    as the current user.
    """
    # this is necessary for adding/removing permissions from this user
    # without trying to contact GCP
    mock_gcloud_client(monkeypatch)

    user = Users(email=user_email,
                 role=CIDCRole.CIMAC_USER.value,
                 approval_date=datetime.now())
    mock_current_user(user, monkeypatch)

    with cidc_api.app_context():
        TrialMetadata(
            trial_id="test_trial",
            metadata_json={
                prism.PROTOCOL_ID_FIELD_NAME: trial_id,
                "participants": [],
                "allowed_cohort_names": ["Arm_Z"],
                "allowed_collection_event_names": [],
            },
        ).insert()

        user.insert()
        return user.id
def setup_permissions(cidc_api, monkeypatch) -> Tuple[int, int]:
    """
    Create two users, one trial, and three permissions in `db`.
    Two permissions will belong to the first user, and the third will
    belong to the second one. Returns the first and second user ids 
    as a tuple.
    """
    current_user = Users(
        id=1,
        email="*****@*****.**",
        role=CIDCRole.CIMAC_USER.value,
        approval_date=datetime.now(),
    )
    other_user = Users(id=2, email="*****@*****.**")

    mock_current_user(current_user, monkeypatch)

    with cidc_api.app_context():
        # Create users
        current_user.insert()
        other_user.insert()

        # Create trial
        TrialMetadata.create(
            TRIAL_ID,
            {
                "protocol_identifier": TRIAL_ID,
                "allowed_collection_event_names": [],
                "allowed_cohort_names": [],
                "participants": [],
            },
        )

        # Create permissions
        def create_permission(uid, assay):
            Permissions(
                granted_by_user=uid,
                granted_to_user=uid,
                trial_id=TRIAL_ID,
                upload_type=assay,
            ).insert()

        create_permission(current_user.id, "ihc")
        create_permission(current_user.id, "olink")
        create_permission(other_user.id, "olink")

        return current_user.id, other_user.id
Beispiel #13
0
def test_trial_metadata_insert(clean_db):
    """Test that metadata validation on insert works as expected"""
    # No error with valid metadata
    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()

    # Error with invalid metadata
    trial.metadata_json = {"foo": "bar"}
    with pytest.raises(ValidationMultiError):
        trial.insert()

    # No error if validate_metadata=False
    trial.insert(validate_metadata=False)
Beispiel #14
0
def test_trial_metadata_patch_manifest(clean_db):
    """Update manifest data in a trial_metadata record"""
    # Add a participant to the trial
    metadata_with_participant = METADATA.copy()
    metadata_with_participant["participants"] = [
        {
            "samples": [],
            "cimac_participant_id": "CTSTP01",
            "participant_id": "trial a",
            "cohort_name": "Arm_Z",
        }
    ]

    with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"):
        TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant)

    # Create trial
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Try again
    TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_participant)

    # Look the trial up and check that it has the participant in it
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert (
        trial.metadata_json["participants"] == metadata_with_participant["participants"]
    )
Beispiel #15
0
def test_assay_upload_merge_extra_metadata(clean_db, monkeypatch):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    TrialMetadata.create(TRIAL_ID, METADATA)

    assay_upload = UploadJobs.create(
        upload_type="assay_with_extra_md",
        uploader_email=EMAIL,
        gcs_file_map={},
        metadata={
            PROTOCOL_ID_FIELD_NAME: TRIAL_ID,
            "whatever": {
                "hierarchy": [
                    {"we just need a": "uuid-1", "to be able": "to merge"},
                    {"and": "uuid-2"},
                ]
            },
        },
        gcs_xlsx_uri="",
        commit=False,
    )
    assay_upload.id = 111
    clean_db.commit()

    custom_extra_md_parse = MagicMock()
    custom_extra_md_parse.side_effect = lambda f: {"extra": f.read().decode()}
    monkeypatch.setattr(
        "cidc_schemas.prism.merger.EXTRA_METADATA_PARSERS",
        {"assay_with_extra_md": custom_extra_md_parse},
    )

    UploadJobs.merge_extra_metadata(
        111,
        {
            "uuid-1": io.BytesIO(b"within extra md file 1"),
            "uuid-2": io.BytesIO(b"within extra md file 2"),
        },
        session=clean_db,
    )

    assert 1 == clean_db.query(UploadJobs).count()
    au = clean_db.query(UploadJobs).first()
    assert "extra" in au.metadata_patch["whatever"]["hierarchy"][0]
    assert "extra" in au.metadata_patch["whatever"]["hierarchy"][1]
Beispiel #16
0
def test_permissions_broad_perms(clean_db, monkeypatch):
    gcloud_client = mock_gcloud_client(monkeypatch)
    user = Users(email="*****@*****.**")
    user.insert()
    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()
    other_trial = TrialMetadata(
        trial_id="other-trial",
        metadata_json={**METADATA, "protocol_identifier": "other-trial"},
    )
    other_trial.insert()
    for ut in ["wes_fastq", "olink"]:
        for tid in [trial.trial_id, other_trial.trial_id]:
            Permissions(
                granted_to_user=user.id,
                trial_id=tid,
                upload_type=ut,
                granted_by_user=user.id,
            ).insert()

    # Can't insert a permission for access to all trials and assays
    with pytest.raises(ValueError, match="must have a trial id or upload type"):
        Permissions(granted_to_user=user.id, granted_by_user=user.id).insert()

    # Inserting a trial-level permission should delete other more specific related perms.
    trial_query = clean_db.query(Permissions).filter(
        Permissions.trial_id == trial.trial_id
    )
    assert trial_query.count() == 2
    Permissions(
        trial_id=trial.trial_id, granted_to_user=user.id, granted_by_user=user.id
    ).insert()
    assert trial_query.count() == 1
    perm = trial_query.one()
    assert perm.trial_id == trial.trial_id
    assert perm.upload_type is None

    # Inserting an upload-level permission should delete other more specific related perms.
    olink_query = clean_db.query(Permissions).filter(Permissions.upload_type == "olink")
    assert olink_query.count() == 1
    assert olink_query.one().trial_id == other_trial.trial_id
    Permissions(
        upload_type="olink", granted_to_user=user.id, granted_by_user=user.id
    ).insert()
    assert olink_query.count() == 1
    perm = olink_query.one()
    assert perm.trial_id is None
    assert perm.upload_type == "olink"

    # Getting perms for a particular user-trial-type returns broader perms
    perm = Permissions.find_for_user_trial_type(user.id, trial.trial_id, "ihc")
    assert perm is not None and perm.upload_type is None
    perm = Permissions.find_for_user_trial_type(user.id, "some random trial", "olink")
    assert perm is not None and perm.trial_id is None
Beispiel #17
0
def setup_db_records(cidc_api):
    extra = {"_etag": ETAG}
    with cidc_api.app_context():
        Users(**users["json"], **extra).insert(compute_etag=False)
        TrialMetadata(**trial_metadata["json"], **extra).insert(compute_etag=False)
        DownloadableFiles(**downloadable_files["json"], **extra).insert(
            compute_etag=False
        )
        Permissions(**permissions["json"], **extra).insert(compute_etag=False)
        UploadJobs(**upload_jobs["json"], **extra).insert(compute_etag=False)
 def create_trial(n, grant_perm=False):
     trial_id = f"test-trial-{n}"
     metadata_json = {
         "protocol_identifier":
         trial_id,
         "participants": [] if n == 2 else [{
             "cimac_participant_id":
             "CTTTPP1",
             "participant_id":
             "x",
             "samples": [{
                 "cimac_id": f"CTTTPP1SS.01",
                 "sample_location": "",
                 "type_of_primary_container": "Other",
                 "type_of_sample": "Other",
                 "collection_event_name": "",
                 "parent_sample_id": "",
             }],
         }],
         "allowed_collection_event_names": [""],
         "allowed_cohort_names": [],
         "assays": {},
         "analysis": {},
         "shipments": [],
     }
     trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata_json)
     trial.insert()
     if grant_perm and user_id:
         Permissions(
             granted_to_user=user_id,
             trial_id=trial.trial_id,
             upload_type="olink",
             granted_by_user=user_id,
         ).insert()
         Permissions(
             granted_to_user=user_id,
             trial_id=trial.trial_id,
             upload_type="ihc",
             granted_by_user=user_id,
         ).insert()
     return trial.id
Beispiel #19
0
def test_create_assay_upload(clean_db):
    """Try to create an assay upload"""
    new_user = Users.create(PROFILE)

    gcs_file_map = {
        "my/first/wes/blob1/2019-08-30T15:51:38.450978": "test-uuid-1",
        "my/first/wes/blob2/2019-08-30T15:51:38.450978": "test-uuid-2",
    }
    metadata_patch = {PROTOCOL_ID_FIELD_NAME: TRIAL_ID}
    gcs_xlsx_uri = "xlsx/assays/wes/12:0:1.5123095"

    # Should fail, since trial doesn't exist yet
    with pytest.raises(IntegrityError):
        UploadJobs.create("wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri)
    clean_db.rollback()

    TrialMetadata.create(TRIAL_ID, METADATA)

    new_job = UploadJobs.create(
        "wes_bam", EMAIL, gcs_file_map, metadata_patch, gcs_xlsx_uri
    )
    job = UploadJobs.find_by_id_and_email(new_job.id, PROFILE["email"])
    assert len(new_job.gcs_file_map) == len(job.gcs_file_map)
    assert set(new_job.gcs_file_map) == set(job.gcs_file_map)
    assert job.status == "started"

    assert list(job.upload_uris_with_data_uris_with_uuids()) == [
        (
            "my/first/wes/blob1/2019-08-30T15:51:38.450978",
            "my/first/wes/blob1",
            "test-uuid-1",
        ),
        (
            "my/first/wes/blob2/2019-08-30T15:51:38.450978",
            "my/first/wes/blob2",
            "test-uuid-2",
        ),
    ]
Beispiel #20
0
def test_permissions_grant_iam_permissions(clean_db, monkeypatch):
    """
    Smoke test that Permissions.grant_iam_permissions calls grant_download_access with the right arguments.
    """
    refresh_intake_access = MagicMock()
    monkeypatch.setattr(
        "cidc_api.models.models.refresh_intake_access", refresh_intake_access
    )

    gcloud_client = mock_gcloud_client(monkeypatch)
    user = Users(email="*****@*****.**", role=CIDCRole.NETWORK_VIEWER.value)
    user.insert()
    trial = TrialMetadata(trial_id=TRIAL_ID, metadata_json=METADATA)
    trial.insert()

    upload_types = ["wes_bam", "ihc", "rna_fastq", "plasma"]
    for upload_type in upload_types:
        Permissions(
            granted_to_user=user.id,
            trial_id=trial.trial_id,
            upload_type=upload_type,
            granted_by_user=user.id,
        ).insert()

    # IAM permissions not granted to network viewers
    Permissions.grant_iam_permissions(user=user)
    gcloud_client.grant_download_access.assert_not_called()

    # IAM permissions should be granted for any other role
    user.role = CIDCRole.CIMAC_USER.value
    Permissions.grant_iam_permissions(user=user)
    for upload_type in upload_types:
        assert (
            call(user.email, trial.trial_id, upload_type)
            in gcloud_client.grant_download_access.call_args_list
        )

    refresh_intake_access.assert_called_once_with(user.email)
Beispiel #21
0
def test_update_trial_metadata(db):
    """Update an existing trial_metadata_record"""
    # Create the initial trial
    TrialMetadata.patch_trial_metadata(TRIAL_ID, METADATA)

    # Add metadata to the trial
    metadata_patch = METADATA.copy()
    metadata_patch["participants"] = [{
        "samples": [],
        "cimac_participant_id": "b",
        "trial_participant_id": "trial a",
        "cohort_id": "cohort_id",
        "arm_id": "arm_id",
    }]
    TrialMetadata.patch_trial_metadata(TRIAL_ID, metadata_patch)

    # Look the trial up and check that it was merged as expected
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    sort = lambda participant_list: sorted(
        participant_list, key=lambda d: d["cimac_participant_id"])
    expected_participants = METADATA["participants"] + metadata_patch[
        "participants"]
    actual_participants = trial.metadata_json["participants"]
    assert sort(actual_participants) == sort(expected_participants)
Beispiel #22
0
def test_create_trial_metadata(clean_db):
    """Insert a trial metadata record if one doesn't exist"""
    TrialMetadata.create(TRIAL_ID, METADATA)
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert trial
    assert trial.metadata_json == METADATA

    # Check that you can't insert a trial with invalid metadata
    with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"):
        TrialMetadata.create("foo", {"buzz": "bazz"})

    with pytest.raises(ValidationMultiError, match="'buzz' was unexpected"):
        TrialMetadata(trial_id="foo", metadata_json={"buzz": "bazz"}).insert()
Beispiel #23
0
def test_trial_metadata_patch_assay(clean_db):
    """Update assay data in a trial_metadata record"""
    # Add an assay to the trial
    metadata_with_assay = METADATA.copy()
    metadata_with_assay["assays"] = {"wes": []}

    with pytest.raises(NoResultFound, match=f"No trial found with id {TRIAL_ID}"):
        TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay)

    # Create trial
    TrialMetadata.create(TRIAL_ID, METADATA)

    # Try again
    TrialMetadata.patch_manifest(TRIAL_ID, metadata_with_assay)

    # Look the trial up and check that it has the assay in it
    trial = TrialMetadata.find_by_trial_id(TRIAL_ID)
    assert trial.metadata_json["assays"] == metadata_with_assay["assays"]
def test_create_trial(cidc_api, clean_db, monkeypatch):
    """Check that creating a new trial works as expected"""
    user_id = setup_user(cidc_api, monkeypatch)
    trial_id = "test-trial"
    trial_json = {
        "trial_id": trial_id,
        "metadata_json": {
            "protocol_identifier": trial_id,
            "participants": [],
            "allowed_collection_event_names": [],
            "allowed_cohort_names": [],
        },
    }

    client = cidc_api.test_client()

    # Non-admins can't create trials
    res = client.post("/trial_metadata", json=trial_json)
    assert res.status_code == 401

    # Allowed users can create trials
    for role in trial_modifier_roles:
        make_role(user_id, role, cidc_api)
        res = client.post("/trial_metadata", json=trial_json)
        assert res.status_code == 201
        assert {**res.json, **trial_json} == res.json

        # No two trials can have the same trial_id
        res = client.post("/trial_metadata", json=trial_json)
        assert res.status_code == 400

        # No trial can be created with invalid metadata
        bad_trial_json = {"trial_id": "foo", "metadata_json": {"foo": "bar"}}
        res = client.post("/trial_metadata", json=bad_trial_json)
        assert res.status_code == 422
        assert res.json["_error"]["message"] == bad_trial_error_message

        # Clear created trial
        with cidc_api.app_context():
            trial = TrialMetadata.find_by_trial_id(trial_id)
            trial.delete()
def setup_downloadable_files(cidc_api) -> Tuple[int, int]:
    """Insert two downloadable files into the database."""
    metadata_json = {
        "protocol_identifier": trial_id_1,
        "allowed_collection_event_names": [],
        "allowed_cohort_names": [],
        "participants": [],
    }
    trial_1 = TrialMetadata(trial_id=trial_id_1, metadata_json=metadata_json)
    trial_2 = TrialMetadata(trial_id=trial_id_2, metadata_json=metadata_json)

    def make_file(trial_id, object_url, upload_type,
                  facet_group) -> DownloadableFiles:
        return DownloadableFiles(
            trial_id=trial_id,
            upload_type=upload_type,
            object_url=f"{trial_id}/{object_url}",
            facet_group=facet_group,
            uploaded_timestamp=datetime.now(),
            file_size_bytes=int(51 * 1e6),  # 51MB
        )

    wes_file = make_file(trial_id_1, "wes/.../reads_123.bam", "wes_bam",
                         "/wes/r1_L.fastq.gz")
    cytof_file = make_file(
        trial_id_2,
        "cytof/.../analysis.zip",
        "cytof_10021_9204",
        "/cytof_analysis/analysis.zip",
    )

    with cidc_api.app_context():
        trial_1.insert()
        trial_2.insert()
        wes_file.insert()
        cytof_file.insert()

        return wes_file.id, cytof_file.id
def test_get_trial_by_trial_id(cidc_api, clean_db, monkeypatch):
    """Check that getting a single trial by trial id works as expected"""
    user_id = setup_user(cidc_api, monkeypatch)
    trial_id, _ = set(setup_trial_metadata(cidc_api))
    with cidc_api.app_context():
        trial = TrialMetadata.find_by_id(trial_id)

    client = cidc_api.test_client()

    # Non-admins can't get single trials
    res = client.get(f"/trial_metadata/{trial.trial_id}")
    assert res.status_code == 401

    # Allowed users can get single trials
    for role in trial_modifier_roles:
        make_role(user_id, role, cidc_api)
        res = client.get(f"/trial_metadata/{trial.trial_id}")
        assert res.status_code == 200
        assert res.json == TrialMetadataSchema().dump(trial)

        # Getting non-existent trials yields 404
        res = client.get(f"/trial_metadata/foobar")
        assert res.status_code == 404
Beispiel #27
0
def _derive_files_from_upload(trial_id: str, upload_type: str, session):
    # Get trial metadata JSON for the associated trial
    trial_record: TrialMetadata = TrialMetadata.find_by_trial_id(
        trial_id, session=session)

    # Run the file derivation
    derivation_context = unprism.DeriveFilesContext(trial_record.metadata_json,
                                                    upload_type,
                                                    fetch_artifact)
    derivation_result = unprism.derive_files(derivation_context)

    # TODO: consider parallelizing this step if necessary
    for artifact in derivation_result.artifacts:
        # Save to GCS
        blob = upload_to_data_bucket(artifact.object_url, artifact.data)

        # Build basic facet group
        facet_group = f"{artifact.data_format}|{artifact.file_type}"

        # Save to database
        df_record = DownloadableFiles.create_from_blob(
            trial_record.trial_id,
            artifact.file_type,
            artifact.data_format,
            facet_group,
            blob,
            session=session,
            alert_artifact_upload=True,
        )
        df_record.additional_metadata = artifact.metadata
        # Assume that a derived file will be directly useful for data analysis
        df_record.analysis_friendly = True

    # Update the trial metadata blob (in case the file derivation modified it)
    trial_record.metadata_json = derivation_result.trial_metadata

    session.commit()
Beispiel #28
0
 def insert_trial(trial_id, num_participants, num_samples):
     TrialMetadata(
         trial_id=trial_id,
         metadata_json={
             prism.PROTOCOL_ID_FIELD_NAME:
             trial_id,
             "allowed_cohort_names": [""],
             "allowed_collection_event_names": [""],
             "participants": [{
                 "cimac_participant_id":
                 f"CTTTPP{p}",
                 "participant_id":
                 "x",
                 "samples": [{
                     "cimac_id": f"CTTTPP1SS.0{s}",
                     "sample_location": "",
                     "type_of_primary_container": "Other",
                     "type_of_sample": "Other",
                     "collection_event_name": "",
                     "parent_sample_id": "",
                 } for s in range(num_samples[p])],
             } for p in range(num_participants)],
         },
     ).insert()
def test_upload_data_files(cidc_api, monkeypatch):
    user = Users(email="*****@*****.**")
    trial = TrialMetadata(
        trial_id="test_trial",
        metadata_json={
            prism.PROTOCOL_ID_FIELD_NAME: trial_id,
            "participants": [],
            "allowed_cohort_names": ["Arm_Z"],
            "allowed_collection_event_names": [],
        },
    )
    template_type = "foo"
    xlsx_file = MagicMock()
    md_patch = {}
    file_infos = [
        finfo(
            "localfile1.ext",
            "test_trial/url/file1.ext",
            "uuid-1",
            metadata_availability=None,
            allow_empty=None,
        ),
        finfo(
            "localfile2.ext",
            "test_trial/url/file2.ext",
            "uuid-2",
            metadata_availability=True,
            allow_empty=None,
        ),
        finfo(
            "localfile3.ext",
            "test_trial/url/file3.ext",
            "uuid-3",
            metadata_availability=None,
            allow_empty=True,
        ),
        finfo(
            "localfile4.ext",
            "test_trial/url/file4.ext",
            "uuid-4",
            metadata_availability=True,
            allow_empty=True,
        ),
    ]

    gcloud_client = MagicMock()
    gcloud_client.grant_upload_access = MagicMock()
    gcloud_client.upload_xlsx_to_gcs = MagicMock()
    gcs_blob = MagicMock()
    gcs_blob.name = "blob"
    gcloud_client.upload_xlsx_to_gcs.return_value = gcs_blob
    monkeypatch.setattr("cidc_api.resources.upload_jobs.gcloud_client",
                        gcloud_client)

    create = MagicMock()
    job = MagicMock()
    job.id = "id"
    job._etag = "_etag"
    job.token = "token"
    create.return_value = job
    monkeypatch.setattr("cidc_api.resources.upload_jobs.UploadJobs.create",
                        create)

    with cidc_api.app_context():
        response = upload_data_files(user, trial, template_type, xlsx_file,
                                     md_patch, file_infos)
    json = response.get_json()

    assert "job_id" in json and json["job_id"] == "id"
    assert "job_etag" in json and json["job_etag"] == "_etag"
    assert "url_mapping" in json
    url_mapping = {
        k: v.rsplit("/", 1)[0]
        for k, v in json["url_mapping"].items()
    }
    assert url_mapping == {
        "localfile1.ext": "test_trial/url/file1.ext",
        "localfile2.ext": "test_trial/url/file2.ext",
        "localfile3.ext": "test_trial/url/file3.ext",
        "localfile4.ext": "test_trial/url/file4.ext",
    }
    assert "gcs_bucket" in json and json["gcs_bucket"] == "cidc-uploads-staging"
    assert "extra_metadata" in json and json["extra_metadata"] == {
        "localfile2.ext": "uuid-2",
        "localfile4.ext": "uuid-4",
    }
    assert "gcs_file_map" in json
    gcs_file_map = sorted(
        [(k.rsplit("/", 1)[0], v) for k, v in json["gcs_file_map"].items()],
        key=lambda i: i[0],
    )
    assert gcs_file_map == [
        ("test_trial/url/file1.ext", "uuid-1"),
        ("test_trial/url/file2.ext", "uuid-2"),
        ("test_trial/url/file3.ext", "uuid-3"),
        ("test_trial/url/file4.ext", "uuid-4"),
    ]
    assert "optional_files" in json and json["optional_files"] == [
        "localfile3.ext",
        "localfile4.ext",
    ]
    assert "token" in json and json["token"] == "token"
Beispiel #30
0
def setup_data(cidc_api, clean_db):
    user = Users(email="*****@*****.**", approval_date=datetime.now())
    shipment = {
        "courier": "FEDEX",
        "ship_to": "",
        "ship_from": "",
        "assay_type": assay_type,
        "manifest_id": manifest_id,
        "date_shipped": "2020-06-10 00:00:00",
        "date_received": "2020-06-11 00:00:00",
        "account_number": "",
        "assay_priority": "1",
        "receiving_party": "MSSM_Rahman",
        "tracking_number": "",
        "shipping_condition": "Frozen_Dry_Ice",
        "quality_of_shipment": "Specimen shipment received in good condition",
    }
    metadata = {
        "protocol_identifier":
        trial_id,
        "shipments": [
            # we get duplicate shipment uploads sometimes
            shipment,
            shipment,
        ],
        "participants": [{
            "cimac_participant_id":
            f"CTTTPP{p}",
            "participant_id":
            "x",
            "cohort_name":
            "",
            "samples": [{
                "cimac_id": f"CTTTPP{p}SS.0{s}",
                "sample_location": "",
                "type_of_primary_container": "Other",
                "type_of_sample": "Other",
                "collection_event_name": "",
                "parent_sample_id": "",
            } for s in range(num_samples[p])],
        } for p in range(num_participants)],
        "allowed_cohort_names": [""],
        "allowed_collection_event_names": [""],
    }
    trial = TrialMetadata(trial_id=trial_id, metadata_json=metadata)
    upload_job = UploadJobs(
        uploader_email=user.email,
        trial_id=trial.trial_id,
        upload_type="pbmc",
        gcs_xlsx_uri="",
        metadata_patch=metadata,
        multifile=False,
    )
    upload_job._set_status_no_validation(UploadJobStatus.MERGE_COMPLETED.value)
    with cidc_api.app_context():
        user.insert()
        trial.insert()
        upload_job.insert()

        clean_db.refresh(user)
        clean_db.refresh(upload_job)
        clean_db.refresh(trial)

    return user, upload_job, trial