def test_cytof_clustergrammer_end_to_end(monkeypatch, metadata_df, upload_type): """Test the CyTOF-clustergrammer transform.""" # Test no file found monkeypatch.setattr(DownloadableFiles, "get_by_object_url", lambda *args, **kwargs: None) with pytest.raises(Exception, match="No downloadable file"): vis_preprocessing(make_pubsub_event("foo/bar"), {}) # Mock a CyTOF summary downloadable file record cytof_record = MagicMock() cytof_record.object_url = "foo.txt" cytof_record.upload_type = upload_type get_by_object_url = MagicMock() get_by_object_url.return_value = cytof_record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) # Mock GCS call gcs_blob = MagicMock() _get_blob_as_stream = MagicMock() fake_cytof = open(CYTOF_PATH, "rb") _get_blob_as_stream.return_value = fake_cytof monkeypatch.setattr(functions.visualizations, "get_blob_as_stream", _get_blob_as_stream) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_by_object_url.assert_called_once() _get_blob_as_stream.assert_called_once() _get_metadata_df.assert_called_once() # Check contents of the clustergrammer output row_names = [ row["name"] for row in cytof_record.clustergrammer["row_nodes"] ] col_names = [ col["name"] for col in cytof_record.clustergrammer["col_nodes"] ] col_cats = [(col["cat-0"], col["cat-1"], col["cat-2"]) for col in cytof_record.clustergrammer["col_nodes"]] # Based on the contents of fake_cytof_summary.csv... assert row_names == ["cell1", "cell2"] assert col_names == ["CIMAC Id: CTTTTPPS1.01", "CIMAC Id: CTTTTPPS2.01"] # Based on the construction of metadata_df... assert col_cats == [ ("Participant Id: CTTTTPP", "Cohort: Arm_A", "Collection Event: Event1"), ("Participant Id: CTTTTPP", "Cohort: Arm_A", "Collection Event: Event2"), ] fake_cytof.close()
def test_cytof_antibody_metadata_end_to_end(monkeypatch, metadata_df): """Test addition of antibody metadata for cytof files""" # Mock an CyTOF downloadable file record cytof_record = MagicMock() cytof_record.object_url = "foo.txt" cytof_record.upload_type = "cytof" cytof_record.additional_metadata = {"foo": "bar"} get_by_object_url = MagicMock() get_by_object_url.return_value = cytof_record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) # Mock trial ct = MagicMock() ct.metadata_json = { "assays": { "cytof": [{ "cytof_antibodies": [ { "usage": "Ignored" }, { "usage": "Used", "stain_type": "Surface Stain", "isotope": "000Foo", "antibody": "Bar", "clone": "Nx/xxx", }, { "usage": "Analysis Only", "stain_type": "Intracellular", "isotope": "001Foo", "antibody": "Baz", }, ], "object_url": "foo.txt", # for DeepSearch }] } } find_by_trial_id = MagicMock() find_by_trial_id.return_value = ct monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_by_object_url.assert_called_once() _get_metadata_df.assert_called_once() assert cytof_record.additional_metadata == { "foo": "bar", "cytof.antibodies": "surface 000Foo-Bar (Nx/xxx), intracellular 001Foo-Baz", }
def test_mif_antibody_metadata_end_to_end(monkeypatch, metadata_df): """Test addition of antibody metadata for MIF files""" # Mock an MIF downloadable file record mif_record = MagicMock() mif_record.object_url = "foo.txt" mif_record.upload_type = "mif" mif_record.additional_metadata = {"foo": "bar"} get_by_object_url = MagicMock() get_by_object_url.return_value = mif_record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) # Mock GCS call ct = MagicMock() ct.metadata_json = { "assays": { "mif": [{ "antibodies": [ { "export_name": "Foo" }, { "antibody": "Bar", "clone": "Nx/xxx", "fluor_wavelength": 500 }, { "antibody": "Baz", "fluor_wavelength": 500 }, ], "object_url": "foo.txt", # for DeepSearch }] } } find_by_trial_id = MagicMock() find_by_trial_id.return_value = ct monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_by_object_url.assert_called_once() _get_metadata_df.assert_called_once() assert mif_record.additional_metadata == { "foo": "bar", "mif.antibodies": "Foo, Bar (Nx/xxx - 500), Baz (500)", }
def test_ihc_combined_end_to_end(monkeypatch, metadata_df): """Test the IHC combined transform.""" # Mock an IHC combined downloadable file record ihc_record = MagicMock() ihc_record.object_url = "foo.txt" ihc_record.upload_type = "ihc marker combined" get_by_object_url = MagicMock() get_by_object_url.return_value = ihc_record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) # Mock GCS call gcs_blob = MagicMock() _get_blob_as_stream = MagicMock() combined_csv = StringIO( "cimac_id,foo,bar\nCTTTTPPS1.01,1,2\nCTTTTPPS2.01,3,4") _get_blob_as_stream.return_value = combined_csv monkeypatch.setattr(functions.visualizations, "get_blob_as_stream", _get_blob_as_stream) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_by_object_url.assert_called_once() _get_blob_as_stream.assert_called_once() _get_metadata_df.assert_called_once() assert ihc_record.ihc_combined_plot == [ { "cimac_id": "CTTTTPPS1.01", "foo": 1, "bar": 2, "cimac_participant_id": "CTTTTPP", "cohort_name": "Arm_A", "collection_event_name": "Event1", }, { "cimac_id": "CTTTTPPS2.01", "foo": 3, "bar": 4, "cimac_participant_id": "CTTTTPP", "cohort_name": "Arm_A", "collection_event_name": "Event2", }, ]
def test_ihc_antibody_metadata_end_to_end(monkeypatch, metadata_df): """Test addition of antibody metadata for IHC files""" # Mock an IHC downloadable file record ihc_record = MagicMock() ihc_record.object_url = "foo.txt" ihc_record.upload_type = "ihc" ihc_record.additional_metadata = {"foo": "bar"} get_by_object_url = MagicMock() get_by_object_url.return_value = ihc_record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) # Mock GCS call ct = MagicMock() ct.metadata_json = { "assays": { "ihc": [{ "antibody": { "antibody": "Bar", "clone": "Nx/xxx" }, "object_url": "foo.txt", # for DeepSearch }] } } find_by_trial_id = MagicMock() find_by_trial_id.return_value = ct monkeypatch.setattr(TrialMetadata, "find_by_trial_id", find_by_trial_id) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_by_object_url.assert_called_once() _get_metadata_df.assert_called_once() assert ihc_record.additional_metadata == { "foo": "bar", "ihc.antibody": "Bar (Nx/xxx)", }
def test_loading_lazily(monkeypatch, metadata_df): """Test that files aren't loaded if there are no transformations for them""" record = MagicMock() record.object_url = "foo.txt" record.upload_type = "something" get_by_object_url = MagicMock() get_by_object_url.return_value = record monkeypatch.setattr(DownloadableFiles, "get_by_object_url", get_by_object_url) get_blob_as_stream = MagicMock() monkeypatch.setattr(functions.visualizations, "get_blob_as_stream", get_blob_as_stream) # Mock metadata_df _get_metadata_df = MagicMock() _get_metadata_df.return_value = metadata_df monkeypatch.setattr(functions.visualizations, "_get_metadata_df", _get_metadata_df) vis_preprocessing(make_pubsub_event("1"), {}) get_blob_as_stream.assert_not_called()
def test_send_email(monkeypatch): """Test that the email sending function builds a message as expected.""" sender = MagicMock() sg_client = MagicMock() sg_client.return_value = sender monkeypatch.setattr(emails, "_get_sg_client", sg_client) # Well-formed email email = { "to_emails": ["*****@*****.**", "*****@*****.**"], "subject": "test subject", "html_content": "test content", } event = make_pubsub_event(json.dumps(email)) emails.send_email(event, None) args, _ = sender.send.call_args message = args[0] # A SendGrid message's string representation is a JSON blob # detailing its configuration. sendgrid_expects = { "from": { "email": "*****@*****.**" }, "subject": "test subject", "personalizations": [{ "to": [{ "email": "*****@*****.**" }, { "email": "*****@*****.**" }] }], "content": [{ "type": "text/html", "value": "test content" }], } assert message == sendgrid_expects event = make_pubsub_event( json.dumps( dict( email, attachments=[{ "file_content": "att/content", "file_name": "att/fname", "file_type": "att/mime", }], ))) emails.send_email(event, None) args, _ = sender.send.call_args message = args[0] # A SendGrid message's string representation is a JSON blob # detailing its configuration. assert message == dict( sendgrid_expects, attachments=[{ "file_content": "att/content", "file_name": "att/fname", "file_type": "att/mime", }], ) # Malformed email del email["subject"] event = make_pubsub_event(json.dumps(email)) with pytest.raises(AssertionError): emails.send_email(event, None)
def test_extract_pubsub_data(): """Ensure that extract_pubsub_data can do what it claims""" data = "hello there" event = make_pubsub_event(data) assert util.extract_pubsub_data(event) == data
from unittest.mock import MagicMock import pytest from cidc_api.models import UploadJobs, UploadJobStatus from functions import upload_postprocessing from tests.util import make_pubsub_event event = make_pubsub_event("1") def test_manifest_preconditions(monkeypatch): """Ensure derive_files_from_manifest_upload blocks derivation under the expected conditions.""" find_upload_by_id = MagicMock() find_upload_by_id.return_value = None # upload record doesn't exist monkeypatch.setattr("cidc_api.models.UploadJobs.find_by_id", find_upload_by_id) with pytest.raises(Exception, match="No manifest upload record found"): upload_postprocessing.derive_files_from_manifest_upload(event, None) # Mock existing upload record find_upload_by_id.return_value = MagicMock() # Ensure that file derivation happens so long as upload record exists _derive_files = MagicMock() monkeypatch.setattr(upload_postprocessing, "_derive_files_from_upload", _derive_files) upload_postprocessing.derive_files_from_manifest_upload(event, None)
def test_ingest_upload(caplog, monkeypatch): """Test upload data transfer functionality""" TS_AND_PATH = "/1234/local_path1.txt" ARTIFACT = {"test-prop": "test-val"} TRIAL_ID = "CIMAC-12345" job = UploadJobs( id=JOB_ID, uploader_email="*****@*****.**", trial_id=TRIAL_ID, gcs_xlsx_uri="test.xlsx", gcs_file_map=FILE_MAP, metadata_patch={ prism.PROTOCOL_ID_FIELD_NAME: TRIAL_ID, "assays": { "wes": [{ "records": [{ "cimac_id": "CIMAC-mock-sa-id", "files": { "r1": { "upload_placeholder": "uuid1" }, "r2": { "upload_placeholder": "uuid2" }, }, }] }] }, }, status=UploadJobStatus.UPLOAD_COMPLETED.value, upload_type="wes_bam", ) # Since the test database isn't yet set up with migrations, # it won't have the correct relations in it, so we can't actually # store or retrieve data find_by_id = MagicMock() find_by_id.return_value = job monkeypatch.setattr(UploadJobs, "find_by_id", find_by_id) # Mock data transfer functionality _gcs_copy = MagicMock() _gcs_copy.side_effect = lambda storage_client, source_bucket, source_object, target_bucket, target_object: _gcs_obj_mock( target_object, 100, datetime.datetime.now(), "gsc_url_mock_md5", "gsc_url_mock_crc32c", ) monkeypatch.setattr("functions.uploads._gcs_copy", _gcs_copy) _get_bucket_and_blob = MagicMock() xlsx_blob = MagicMock() _get_bucket_and_blob.return_value = None, xlsx_blob monkeypatch.setattr("functions.uploads._get_bucket_and_blob", _get_bucket_and_blob) monkeypatch.setattr( "functions.uploads.GOOGLE_ANALYSIS_PERMISSIONS_GROUPS_DICT", {"wes": "analysis-group@email"}, ) # mocking `google.cloud.storage.Client()` to not actually create a client _storage_client = MagicMock("_storage_client") monkeypatch.setattr("functions.uploads.storage.Client", lambda *a, **kw: _storage_client) _bucket = MagicMock("_bucket") _storage_client.get_bucket = lambda *a, **kw: _bucket _storage_client._connection = _connection = MagicMock("_connection") _api_request = _connection.api_request = MagicMock( "_connection.api_request") _api_request.return_value = {"bindings": []} _bucket.set_iam_policy = _set_iam_policy = MagicMock( "_bucket.set_iam_policy") _bucket.get_iam_policy = _get_iam_policy = MagicMock( "_bucket.get_iam_policy") _policy = _get_iam_policy.return_value = MagicMock("_policy") iam_prefix = f'resource.name.startsWith("projects/_/buckets/cidc-data-staging/objects/{TRIAL_ID}/wes/")' # This set up checks handling duplicate bindings _policy.bindings = [{ "role": GOOGLE_ANALYSIS_GROUP_ROLE, "members": {f"group:analysis-group@email"}, "condition": { "expression": iam_prefix }, }] # Mock metadata merging functionality _save_file = MagicMock("_save_file") monkeypatch.setattr(DownloadableFiles, "create_from_metadata", _save_file) _save_blob_file = MagicMock("_save_blob_file") monkeypatch.setattr(DownloadableFiles, "create_from_blob", _save_blob_file) _merge_metadata = MagicMock("_merge_metadata") monkeypatch.setattr(TrialMetadata, "patch_assays", _merge_metadata) publish_artifact_upload = MagicMock("publish_artifact_upload") monkeypatch.setattr(uploads, "publish_artifact_upload", publish_artifact_upload) _encode_and_publish = MagicMock("_encode_and_publish") monkeypatch.setattr(uploads, "_encode_and_publish", _encode_and_publish) successful_upload_event = make_pubsub_event(str(job.id)) response = ingest_upload(successful_upload_event, None).json assert response[URI1 + UPLOAD_DATE_PATH] == URI1 assert response[URI2 + UPLOAD_DATE_PATH] == URI2 find_by_id.assert_called_once() # Check that we copied multiple objects _gcs_copy.assert_called() and not _gcs_copy.assert_called_once() # Check that we tried to save multiple files _save_file.assert_called() and not _save_file.assert_called_once() # Check that we tried to merge metadata once _merge_metadata.assert_called_once() # Check that we got the xlsx blob metadata from GCS _get_bucket_and_blob.assert_called_with(_storage_client, GOOGLE_DATA_BUCKET, job.gcs_xlsx_uri) # Check that we created a downloadable file for the xlsx file blob assert _save_blob_file.call_args[:-1][0] == ( "CIMAC-12345", "wes_bam", "Assay Metadata", "wes_bam|Assay Metadata", xlsx_blob, ) # Check that we tried to update GCS access policy _set_iam_policy.assert_called_once() # Check that we aded GCS access for biofx team assert _policy == _set_iam_policy.call_args[0][0] assert len(_policy.bindings) == 1 assert _policy.bindings[0]["members"] == {"group:analysis-group@email"} assert _policy.bindings[0][ "role"] == "projects/cidc-dfci-staging/roles/CIDC_biofx" assert iam_prefix in _policy.bindings[0]["condition"]["expression"] _until = datetime.datetime.today() + datetime.timedelta( GOOGLE_ANALYSIS_PERMISSIONS_GRANT_FOR_DAYS) assert ( f'request.time < timestamp("{_until.date().isoformat()}T00:00:00Z")' in _policy.bindings[0]["condition"]["expression"]) # Check that the job status was updated to reflect a successful upload assert job.status == UploadJobStatus.MERGE_COMPLETED.value assert email_was_sent(caplog.text) publish_artifact_upload.assert_called() _encode_and_publish.assert_called()