def test_cytof_clustergrammer_end_to_end(monkeypatch, metadata_df,
                                         upload_type):
    """Test the CyTOF-clustergrammer transform."""
    # Test no file found
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        lambda *args, **kwargs: None)
    with pytest.raises(Exception, match="No downloadable file"):
        vis_preprocessing(make_pubsub_event("foo/bar"), {})

    # Mock a CyTOF summary downloadable file record
    cytof_record = MagicMock()
    cytof_record.object_url = "foo.txt"
    cytof_record.upload_type = upload_type
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = cytof_record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    # Mock GCS call
    gcs_blob = MagicMock()
    _get_blob_as_stream = MagicMock()
    fake_cytof = open(CYTOF_PATH, "rb")
    _get_blob_as_stream.return_value = fake_cytof
    monkeypatch.setattr(functions.visualizations, "get_blob_as_stream",
                        _get_blob_as_stream)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_by_object_url.assert_called_once()
    _get_blob_as_stream.assert_called_once()
    _get_metadata_df.assert_called_once()

    # Check contents of the clustergrammer output
    row_names = [
        row["name"] for row in cytof_record.clustergrammer["row_nodes"]
    ]
    col_names = [
        col["name"] for col in cytof_record.clustergrammer["col_nodes"]
    ]
    col_cats = [(col["cat-0"], col["cat-1"], col["cat-2"])
                for col in cytof_record.clustergrammer["col_nodes"]]

    # Based on the contents of fake_cytof_summary.csv...
    assert row_names == ["cell1", "cell2"]
    assert col_names == ["CIMAC Id: CTTTTPPS1.01", "CIMAC Id: CTTTTPPS2.01"]

    # Based on the construction of metadata_df...
    assert col_cats == [
        ("Participant Id: CTTTTPP", "Cohort: Arm_A",
         "Collection Event: Event1"),
        ("Participant Id: CTTTTPP", "Cohort: Arm_A",
         "Collection Event: Event2"),
    ]

    fake_cytof.close()
def test_cytof_antibody_metadata_end_to_end(monkeypatch, metadata_df):
    """Test addition of antibody metadata for cytof files"""
    # Mock an CyTOF downloadable file record
    cytof_record = MagicMock()
    cytof_record.object_url = "foo.txt"
    cytof_record.upload_type = "cytof"
    cytof_record.additional_metadata = {"foo": "bar"}
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = cytof_record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    # Mock trial
    ct = MagicMock()
    ct.metadata_json = {
        "assays": {
            "cytof": [{
                "cytof_antibodies": [
                    {
                        "usage": "Ignored"
                    },
                    {
                        "usage": "Used",
                        "stain_type": "Surface Stain",
                        "isotope": "000Foo",
                        "antibody": "Bar",
                        "clone": "Nx/xxx",
                    },
                    {
                        "usage": "Analysis Only",
                        "stain_type": "Intracellular",
                        "isotope": "001Foo",
                        "antibody": "Baz",
                    },
                ],
                "object_url":
                "foo.txt",  # for DeepSearch
            }]
        }
    }
    find_by_trial_id = MagicMock()
    find_by_trial_id.return_value = ct
    monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_by_object_url.assert_called_once()
    _get_metadata_df.assert_called_once()

    assert cytof_record.additional_metadata == {
        "foo":
        "bar",
        "cytof.antibodies":
        "surface 000Foo-Bar (Nx/xxx), intracellular 001Foo-Baz",
    }
def test_mif_antibody_metadata_end_to_end(monkeypatch, metadata_df):
    """Test addition of antibody metadata for MIF files"""
    # Mock an MIF downloadable file record
    mif_record = MagicMock()
    mif_record.object_url = "foo.txt"
    mif_record.upload_type = "mif"
    mif_record.additional_metadata = {"foo": "bar"}
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = mif_record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    # Mock GCS call
    ct = MagicMock()
    ct.metadata_json = {
        "assays": {
            "mif": [{
                "antibodies": [
                    {
                        "export_name": "Foo"
                    },
                    {
                        "antibody": "Bar",
                        "clone": "Nx/xxx",
                        "fluor_wavelength": 500
                    },
                    {
                        "antibody": "Baz",
                        "fluor_wavelength": 500
                    },
                ],
                "object_url":
                "foo.txt",  # for DeepSearch
            }]
        }
    }
    find_by_trial_id = MagicMock()
    find_by_trial_id.return_value = ct
    monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_by_object_url.assert_called_once()
    _get_metadata_df.assert_called_once()

    assert mif_record.additional_metadata == {
        "foo": "bar",
        "mif.antibodies": "Foo, Bar (Nx/xxx - 500), Baz (500)",
    }
def test_ihc_combined_end_to_end(monkeypatch, metadata_df):
    """Test the IHC combined transform."""
    # Mock an IHC combined downloadable file record
    ihc_record = MagicMock()
    ihc_record.object_url = "foo.txt"
    ihc_record.upload_type = "ihc marker combined"
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = ihc_record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    # Mock GCS call
    gcs_blob = MagicMock()
    _get_blob_as_stream = MagicMock()
    combined_csv = StringIO(
        "cimac_id,foo,bar\nCTTTTPPS1.01,1,2\nCTTTTPPS2.01,3,4")
    _get_blob_as_stream.return_value = combined_csv
    monkeypatch.setattr(functions.visualizations, "get_blob_as_stream",
                        _get_blob_as_stream)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_by_object_url.assert_called_once()
    _get_blob_as_stream.assert_called_once()
    _get_metadata_df.assert_called_once()

    assert ihc_record.ihc_combined_plot == [
        {
            "cimac_id": "CTTTTPPS1.01",
            "foo": 1,
            "bar": 2,
            "cimac_participant_id": "CTTTTPP",
            "cohort_name": "Arm_A",
            "collection_event_name": "Event1",
        },
        {
            "cimac_id": "CTTTTPPS2.01",
            "foo": 3,
            "bar": 4,
            "cimac_participant_id": "CTTTTPP",
            "cohort_name": "Arm_A",
            "collection_event_name": "Event2",
        },
    ]
def test_ihc_antibody_metadata_end_to_end(monkeypatch, metadata_df):
    """Test addition of antibody metadata for IHC files"""
    # Mock an IHC downloadable file record
    ihc_record = MagicMock()
    ihc_record.object_url = "foo.txt"
    ihc_record.upload_type = "ihc"
    ihc_record.additional_metadata = {"foo": "bar"}
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = ihc_record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    # Mock GCS call
    ct = MagicMock()
    ct.metadata_json = {
        "assays": {
            "ihc": [{
                "antibody": {
                    "antibody": "Bar",
                    "clone": "Nx/xxx"
                },
                "object_url": "foo.txt",  # for DeepSearch
            }]
        }
    }
    find_by_trial_id = MagicMock()
    find_by_trial_id.return_value = ct
    monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_by_object_url.assert_called_once()
    _get_metadata_df.assert_called_once()

    assert ihc_record.additional_metadata == {
        "foo": "bar",
        "ihc.antibody": "Bar (Nx/xxx)",
    }
def test_loading_lazily(monkeypatch, metadata_df):
    """Test that files aren't loaded if there are no transformations for them"""
    record = MagicMock()
    record.object_url = "foo.txt"
    record.upload_type = "something"
    get_by_object_url = MagicMock()
    get_by_object_url.return_value = record
    monkeypatch.setattr(DownloadableFiles, "get_by_object_url",
                        get_by_object_url)

    get_blob_as_stream = MagicMock()
    monkeypatch.setattr(functions.visualizations, "get_blob_as_stream",
                        get_blob_as_stream)

    # Mock metadata_df
    _get_metadata_df = MagicMock()
    _get_metadata_df.return_value = metadata_df
    monkeypatch.setattr(functions.visualizations, "_get_metadata_df",
                        _get_metadata_df)

    vis_preprocessing(make_pubsub_event("1"), {})
    get_blob_as_stream.assert_not_called()
def test_send_email(monkeypatch):
    """Test that the email sending function builds a message as expected."""
    sender = MagicMock()
    sg_client = MagicMock()
    sg_client.return_value = sender
    monkeypatch.setattr(emails, "_get_sg_client", sg_client)

    # Well-formed email
    email = {
        "to_emails": ["*****@*****.**", "*****@*****.**"],
        "subject": "test subject",
        "html_content": "test content",
    }
    event = make_pubsub_event(json.dumps(email))
    emails.send_email(event, None)
    args, _ = sender.send.call_args
    message = args[0]
    # A SendGrid message's string representation is a JSON blob
    # detailing its configuration.
    sendgrid_expects = {
        "from": {
            "email": "*****@*****.**"
        },
        "subject":
        "test subject",
        "personalizations": [{
            "to": [{
                "email": "*****@*****.**"
            }, {
                "email": "*****@*****.**"
            }]
        }],
        "content": [{
            "type": "text/html",
            "value": "test content"
        }],
    }

    assert message == sendgrid_expects

    event = make_pubsub_event(
        json.dumps(
            dict(
                email,
                attachments=[{
                    "file_content": "att/content",
                    "file_name": "att/fname",
                    "file_type": "att/mime",
                }],
            )))
    emails.send_email(event, None)
    args, _ = sender.send.call_args
    message = args[0]
    # A SendGrid message's string representation is a JSON blob
    # detailing its configuration.
    assert message == dict(
        sendgrid_expects,
        attachments=[{
            "file_content": "att/content",
            "file_name": "att/fname",
            "file_type": "att/mime",
        }],
    )

    # Malformed email
    del email["subject"]
    event = make_pubsub_event(json.dumps(email))
    with pytest.raises(AssertionError):
        emails.send_email(event, None)
Exemple #8
0
def test_extract_pubsub_data():
    """Ensure that extract_pubsub_data can do what it claims"""
    data = "hello there"
    event = make_pubsub_event(data)
    assert util.extract_pubsub_data(event) == data
from unittest.mock import MagicMock

import pytest
from cidc_api.models import UploadJobs, UploadJobStatus

from functions import upload_postprocessing

from tests.util import make_pubsub_event

event = make_pubsub_event("1")


def test_manifest_preconditions(monkeypatch):
    """Ensure derive_files_from_manifest_upload blocks derivation under the expected conditions."""
    find_upload_by_id = MagicMock()
    find_upload_by_id.return_value = None  # upload record doesn't exist
    monkeypatch.setattr("cidc_api.models.UploadJobs.find_by_id",
                        find_upload_by_id)

    with pytest.raises(Exception, match="No manifest upload record found"):
        upload_postprocessing.derive_files_from_manifest_upload(event, None)

    # Mock existing upload record
    find_upload_by_id.return_value = MagicMock()

    # Ensure that file derivation happens so long as upload record exists
    _derive_files = MagicMock()
    monkeypatch.setattr(upload_postprocessing, "_derive_files_from_upload",
                        _derive_files)

    upload_postprocessing.derive_files_from_manifest_upload(event, None)
Exemple #10
0
def test_ingest_upload(caplog, monkeypatch):
    """Test upload data transfer functionality"""

    TS_AND_PATH = "/1234/local_path1.txt"
    ARTIFACT = {"test-prop": "test-val"}
    TRIAL_ID = "CIMAC-12345"

    job = UploadJobs(
        id=JOB_ID,
        uploader_email="*****@*****.**",
        trial_id=TRIAL_ID,
        gcs_xlsx_uri="test.xlsx",
        gcs_file_map=FILE_MAP,
        metadata_patch={
            prism.PROTOCOL_ID_FIELD_NAME: TRIAL_ID,
            "assays": {
                "wes": [{
                    "records": [{
                        "cimac_id": "CIMAC-mock-sa-id",
                        "files": {
                            "r1": {
                                "upload_placeholder": "uuid1"
                            },
                            "r2": {
                                "upload_placeholder": "uuid2"
                            },
                        },
                    }]
                }]
            },
        },
        status=UploadJobStatus.UPLOAD_COMPLETED.value,
        upload_type="wes_bam",
    )

    # Since the test database isn't yet set up with migrations,
    # it won't have the correct relations in it, so we can't actually
    # store or retrieve data
    find_by_id = MagicMock()
    find_by_id.return_value = job
    monkeypatch.setattr(UploadJobs, "find_by_id", find_by_id)

    # Mock data transfer functionality
    _gcs_copy = MagicMock()
    _gcs_copy.side_effect = lambda storage_client, source_bucket, source_object, target_bucket, target_object: _gcs_obj_mock(
        target_object,
        100,
        datetime.datetime.now(),
        "gsc_url_mock_md5",
        "gsc_url_mock_crc32c",
    )
    monkeypatch.setattr("functions.uploads._gcs_copy", _gcs_copy)

    _get_bucket_and_blob = MagicMock()
    xlsx_blob = MagicMock()
    _get_bucket_and_blob.return_value = None, xlsx_blob
    monkeypatch.setattr("functions.uploads._get_bucket_and_blob",
                        _get_bucket_and_blob)

    monkeypatch.setattr(
        "functions.uploads.GOOGLE_ANALYSIS_PERMISSIONS_GROUPS_DICT",
        {"wes": "analysis-group@email"},
    )

    # mocking `google.cloud.storage.Client()` to not actually create a client
    _storage_client = MagicMock("_storage_client")
    monkeypatch.setattr("functions.uploads.storage.Client",
                        lambda *a, **kw: _storage_client)

    _bucket = MagicMock("_bucket")
    _storage_client.get_bucket = lambda *a, **kw: _bucket

    _storage_client._connection = _connection = MagicMock("_connection")

    _api_request = _connection.api_request = MagicMock(
        "_connection.api_request")
    _api_request.return_value = {"bindings": []}

    _bucket.set_iam_policy = _set_iam_policy = MagicMock(
        "_bucket.set_iam_policy")
    _bucket.get_iam_policy = _get_iam_policy = MagicMock(
        "_bucket.get_iam_policy")
    _policy = _get_iam_policy.return_value = MagicMock("_policy")
    iam_prefix = f'resource.name.startsWith("projects/_/buckets/cidc-data-staging/objects/{TRIAL_ID}/wes/")'
    # This set up checks handling duplicate bindings
    _policy.bindings = [{
        "role": GOOGLE_ANALYSIS_GROUP_ROLE,
        "members": {f"group:analysis-group@email"},
        "condition": {
            "expression": iam_prefix
        },
    }]

    # Mock metadata merging functionality
    _save_file = MagicMock("_save_file")
    monkeypatch.setattr(DownloadableFiles, "create_from_metadata", _save_file)

    _save_blob_file = MagicMock("_save_blob_file")
    monkeypatch.setattr(DownloadableFiles, "create_from_blob", _save_blob_file)

    _merge_metadata = MagicMock("_merge_metadata")
    monkeypatch.setattr(TrialMetadata, "patch_assays", _merge_metadata)

    publish_artifact_upload = MagicMock("publish_artifact_upload")
    monkeypatch.setattr(uploads, "publish_artifact_upload",
                        publish_artifact_upload)

    _encode_and_publish = MagicMock("_encode_and_publish")
    monkeypatch.setattr(uploads, "_encode_and_publish", _encode_and_publish)

    successful_upload_event = make_pubsub_event(str(job.id))
    response = ingest_upload(successful_upload_event, None).json

    assert response[URI1 + UPLOAD_DATE_PATH] == URI1
    assert response[URI2 + UPLOAD_DATE_PATH] == URI2
    find_by_id.assert_called_once()
    # Check that we copied multiple objects
    _gcs_copy.assert_called() and not _gcs_copy.assert_called_once()
    # Check that we tried to save multiple files
    _save_file.assert_called() and not _save_file.assert_called_once()
    # Check that we tried to merge metadata once
    _merge_metadata.assert_called_once()
    # Check that we got the xlsx blob metadata from GCS
    _get_bucket_and_blob.assert_called_with(_storage_client,
                                            GOOGLE_DATA_BUCKET,
                                            job.gcs_xlsx_uri)
    # Check that we created a downloadable file for the xlsx file blob
    assert _save_blob_file.call_args[:-1][0] == (
        "CIMAC-12345",
        "wes_bam",
        "Assay Metadata",
        "wes_bam|Assay Metadata",
        xlsx_blob,
    )
    # Check that we tried to update GCS access policy
    _set_iam_policy.assert_called_once()
    # Check that we aded GCS access for biofx team
    assert _policy == _set_iam_policy.call_args[0][0]
    assert len(_policy.bindings) == 1
    assert _policy.bindings[0]["members"] == {"group:analysis-group@email"}
    assert _policy.bindings[0][
        "role"] == "projects/cidc-dfci-staging/roles/CIDC_biofx"
    assert iam_prefix in _policy.bindings[0]["condition"]["expression"]
    _until = datetime.datetime.today() + datetime.timedelta(
        GOOGLE_ANALYSIS_PERMISSIONS_GRANT_FOR_DAYS)
    assert (
        f'request.time < timestamp("{_until.date().isoformat()}T00:00:00Z")'
        in _policy.bindings[0]["condition"]["expression"])

    # Check that the job status was updated to reflect a successful upload
    assert job.status == UploadJobStatus.MERGE_COMPLETED.value
    assert email_was_sent(caplog.text)
    publish_artifact_upload.assert_called()
    _encode_and_publish.assert_called()