def test_finalizer_update( self, mock_feature_transformer, mock_fetch_annotations, mock_analysis_transformer, mock_specimen_library_transformer, mock_project_publication_contributor_transformer, mock_log_error, mock_upload_and_load, mock_swagger_spec): mock_swagger_spec.return_value = self.stub_swagger_spec extractor = DSSExtractor(staging_directory="test_dir", content_type_patterns=[], filename_patterns=[], dss_client=get_dss_client("dev")) finalizer_update(extractor) mock_feature_transformer.assert_called_once_with("test_dir/bundles") mock_analysis_transformer.assert_called_once_with("test_dir/bundles") mock_specimen_library_transformer.assert_called_once_with( "test_dir/bundles") mock_project_publication_contributor_transformer.assert_called_once_with( "test_dir/bundles") mock_upload_and_load.assert_called_once_with("test_dir", is_update=True) e = Exception() mock_upload_and_load.reset_mock() mock_analysis_transformer.side_effect = e finalizer_update(extractor) mock_log_error.assert_called_once_with("AnalysisTransformer", e, mock.ANY, extractor) mock_upload_and_load.assert_called_once_with("test_dir", is_update=True)
def read_bundle(fqid): dss_client = get_dss_client(os.environ['DEPLOYMENT_STAGE']) bundle_uuid, bundle_version = fqid.split(".", 1) bundle = dss_client.get_bundle(uuid=bundle_uuid, version=bundle_version, replica="aws") rsem_file = [ f for f in bundle['bundle']['files'] if f["name"].endswith(".genes.results") ][0] rsem_contents = dss_client.get_file(uuid=rsem_file["uuid"], version=rsem_file["version"], replica="aws") rsem_reader = csv.DictReader(io.StringIO(rsem_contents.decode()), delimiter="\t") bundle_expression_sum = 0 bundle_expression_nonzeros = set() for row in rsem_reader: bundle_expression_sum += float(row["expected_count"]) if float(row['expected_count']) != 0.0: bundle_expression_nonzeros.add(row["gene_id"].split(".", 1)[0]) return { "expression_sum": bundle_expression_sum, "expression_nonzero": len(bundle_expression_nonzeros), "cell_count": 1 }
def test_get_dss_client(self, mock_swagger_spec): mock_swagger_spec.return_value = self.stub_swagger_spec self.stub_swagger_spec[ 'host'] = "dss.integration.data.humancellatlas.org" self.assertEqual( get_dss_client("integration").host, "https://dss.integration.data.humancellatlas.org/v1") self.stub_swagger_spec['host'] = "dss.staging.data.humancellatlas.org" self.assertEqual( get_dss_client("staging").host, "https://dss.staging.data.humancellatlas.org/v1") self.stub_swagger_spec['host'] = "dss.data.humancellatlas.org" self.assertEqual( get_dss_client("predev").host, "https://dss.data.humancellatlas.org/v1") self.assertEqual( get_dss_client("dev").host, "https://dss.data.humancellatlas.org/v1") self.assertEqual( get_dss_client("prod").host, "https://dss.data.humancellatlas.org/v1")
def _verify_load(es_query): dss_client = etl.get_dss_client(deployment_stage=os.environ['DEPLOYMENT_STAGE']) response = dss_client.post_search.iterate(es_query=es_query, replica='aws', per_page=500) expected_bundles = list(result['bundle_fqid'] for result in response) print(f"Loading {len(expected_bundles)} bundles to {os.environ['DEPLOYMENT_STAGE']} complete.\n" f"Verifying row counts in Redshift...") redshift = RedshiftHandler() count_bundles_query = f"SELECT COUNT(*) FROM analysis WHERE bundle_fqid IN {format_str_list(expected_bundles)}" results = redshift.transaction(queries=[count_bundles_query], return_results=True) print(f"Found {results[0][0]} analysis rows for {len(expected_bundles)} expected bundles.") assert (results[0][0] == len(expected_bundles))
def test_transform_bundle(self, mock_cell_expression_transform, mock_log_error, mock_swagger_spec): mock_swagger_spec.return_value = self.stub_swagger_spec extractor = DSSExtractor(staging_directory="test_dir", content_type_patterns=[], filename_patterns=[], dss_client=get_dss_client("dev")) transform_bundle("test_uuid", "test_version", "test_path", "test_manifest_path", extractor) mock_cell_expression_transform.assert_called_once_with("test_path") e = Exception() mock_cell_expression_transform.side_effect = e transform_bundle("test_uuid", "test_version", "test_path", "test_manifest_path", extractor) mock_log_error.assert_called_once_with("test_uuid.test_version", e, mock.ANY, extractor)
def test_log_error(self, mock_error, mock_datetime_now, mock_swagger_spec): mock_swagger_spec.return_value = self.stub_swagger_spec mock_datetime_now.return_value = "timestamp" ex = Exception("msg") extractor = DSSExtractor(staging_directory="test_dir", content_type_patterns=[], filename_patterns=[], dss_client=get_dss_client("dev")) with mock.patch("builtins.open", mock.mock_open()) as mock_open: _log_error("test_bundle", ex, "test_trace", extractor) handle = mock_open() expected_calls = [ mock.call( "[timestamp] test_bundle failed with exception: msg\ntest_trace\n" ), mock.call("test_bundle\n") ] handle.write.assert_has_calls(expected_calls) self.assertTrue(mock_error.called)
import json import mock import os import boto3 from matrix.common import constants from matrix.common.etl import get_dss_client from scripts.dss_subscription import (recreate_dss_subscription, _generate_metadata_schema_version_clause, _regenerate_and_set_hmac_secret_key, DSS_SUBSCRIPTION_HMAC_SECRET_ID) from tests.unit import MatrixTestCaseUsingMockAWS DSS_CLIENT = get_dss_client("dev") class TestDssSubscription(MatrixTestCaseUsingMockAWS): def setUp(self): super(TestDssSubscription, self).setUp() self.swagger_spec_stub = { 'info': { 'description': "test_description" }, 'host': "test_host", 'basePath': "/v1", 'paths': {} } @mock.patch("secrets.token_hex") def test_regenerate_and_set_hmac_secret(self, mock_token_hex):