def test_finalizer_update(
            self, mock_feature_transformer, mock_fetch_annotations,
            mock_analysis_transformer, mock_specimen_library_transformer,
            mock_project_publication_contributor_transformer, mock_log_error,
            mock_upload_and_load, mock_swagger_spec):
        mock_swagger_spec.return_value = self.stub_swagger_spec
        extractor = DSSExtractor(staging_directory="test_dir",
                                 content_type_patterns=[],
                                 filename_patterns=[],
                                 dss_client=get_dss_client("dev"))

        finalizer_update(extractor)
        mock_feature_transformer.assert_called_once_with("test_dir/bundles")
        mock_analysis_transformer.assert_called_once_with("test_dir/bundles")
        mock_specimen_library_transformer.assert_called_once_with(
            "test_dir/bundles")
        mock_project_publication_contributor_transformer.assert_called_once_with(
            "test_dir/bundles")
        mock_upload_and_load.assert_called_once_with("test_dir",
                                                     is_update=True)

        e = Exception()
        mock_upload_and_load.reset_mock()
        mock_analysis_transformer.side_effect = e
        finalizer_update(extractor)
        mock_log_error.assert_called_once_with("AnalysisTransformer", e,
                                               mock.ANY, extractor)
        mock_upload_and_load.assert_called_once_with("test_dir",
                                                     is_update=True)
Exemple #2
0
    def read_bundle(fqid):
        dss_client = get_dss_client(os.environ['DEPLOYMENT_STAGE'])

        bundle_uuid, bundle_version = fqid.split(".", 1)
        bundle = dss_client.get_bundle(uuid=bundle_uuid,
                                       version=bundle_version,
                                       replica="aws")

        rsem_file = [
            f for f in bundle['bundle']['files']
            if f["name"].endswith(".genes.results")
        ][0]

        rsem_contents = dss_client.get_file(uuid=rsem_file["uuid"],
                                            version=rsem_file["version"],
                                            replica="aws")

        rsem_reader = csv.DictReader(io.StringIO(rsem_contents.decode()),
                                     delimiter="\t")

        bundle_expression_sum = 0
        bundle_expression_nonzeros = set()
        for row in rsem_reader:
            bundle_expression_sum += float(row["expected_count"])
            if float(row['expected_count']) != 0.0:
                bundle_expression_nonzeros.add(row["gene_id"].split(".", 1)[0])
        return {
            "expression_sum": bundle_expression_sum,
            "expression_nonzero": len(bundle_expression_nonzeros),
            "cell_count": 1
        }
    def test_get_dss_client(self, mock_swagger_spec):
        mock_swagger_spec.return_value = self.stub_swagger_spec

        self.stub_swagger_spec[
            'host'] = "dss.integration.data.humancellatlas.org"
        self.assertEqual(
            get_dss_client("integration").host,
            "https://dss.integration.data.humancellatlas.org/v1")

        self.stub_swagger_spec['host'] = "dss.staging.data.humancellatlas.org"
        self.assertEqual(
            get_dss_client("staging").host,
            "https://dss.staging.data.humancellatlas.org/v1")

        self.stub_swagger_spec['host'] = "dss.data.humancellatlas.org"
        self.assertEqual(
            get_dss_client("predev").host,
            "https://dss.data.humancellatlas.org/v1")
        self.assertEqual(
            get_dss_client("dev").host,
            "https://dss.data.humancellatlas.org/v1")
        self.assertEqual(
            get_dss_client("prod").host,
            "https://dss.data.humancellatlas.org/v1")
def _verify_load(es_query):
    dss_client = etl.get_dss_client(deployment_stage=os.environ['DEPLOYMENT_STAGE'])
    response = dss_client.post_search.iterate(es_query=es_query,
                                              replica='aws',
                                              per_page=500)
    expected_bundles = list(result['bundle_fqid'] for result in response)

    print(f"Loading {len(expected_bundles)} bundles to {os.environ['DEPLOYMENT_STAGE']} complete.\n"
          f"Verifying row counts in Redshift...")
    redshift = RedshiftHandler()
    count_bundles_query = f"SELECT COUNT(*) FROM analysis WHERE bundle_fqid IN {format_str_list(expected_bundles)}"
    results = redshift.transaction(queries=[count_bundles_query],
                                   return_results=True)
    print(f"Found {results[0][0]} analysis rows for {len(expected_bundles)} expected bundles.")
    assert (results[0][0] == len(expected_bundles))
    def test_transform_bundle(self, mock_cell_expression_transform,
                              mock_log_error, mock_swagger_spec):
        mock_swagger_spec.return_value = self.stub_swagger_spec
        extractor = DSSExtractor(staging_directory="test_dir",
                                 content_type_patterns=[],
                                 filename_patterns=[],
                                 dss_client=get_dss_client("dev"))

        transform_bundle("test_uuid", "test_version", "test_path",
                         "test_manifest_path", extractor)
        mock_cell_expression_transform.assert_called_once_with("test_path")

        e = Exception()
        mock_cell_expression_transform.side_effect = e
        transform_bundle("test_uuid", "test_version", "test_path",
                         "test_manifest_path", extractor)
        mock_log_error.assert_called_once_with("test_uuid.test_version", e,
                                               mock.ANY, extractor)
    def test_log_error(self, mock_error, mock_datetime_now, mock_swagger_spec):
        mock_swagger_spec.return_value = self.stub_swagger_spec
        mock_datetime_now.return_value = "timestamp"

        ex = Exception("msg")
        extractor = DSSExtractor(staging_directory="test_dir",
                                 content_type_patterns=[],
                                 filename_patterns=[],
                                 dss_client=get_dss_client("dev"))

        with mock.patch("builtins.open", mock.mock_open()) as mock_open:
            _log_error("test_bundle", ex, "test_trace", extractor)

            handle = mock_open()
            expected_calls = [
                mock.call(
                    "[timestamp] test_bundle failed with exception: msg\ntest_trace\n"
                ),
                mock.call("test_bundle\n")
            ]
            handle.write.assert_has_calls(expected_calls)
            self.assertTrue(mock_error.called)
Exemple #7
0
import json
import mock
import os

import boto3

from matrix.common import constants
from matrix.common.etl import get_dss_client
from scripts.dss_subscription import (recreate_dss_subscription,
                                      _generate_metadata_schema_version_clause,
                                      _regenerate_and_set_hmac_secret_key,
                                      DSS_SUBSCRIPTION_HMAC_SECRET_ID)
from tests.unit import MatrixTestCaseUsingMockAWS

DSS_CLIENT = get_dss_client("dev")


class TestDssSubscription(MatrixTestCaseUsingMockAWS):
    def setUp(self):
        super(TestDssSubscription, self).setUp()
        self.swagger_spec_stub = {
            'info': {
                'description': "test_description"
            },
            'host': "test_host",
            'basePath': "/v1",
            'paths': {}
        }

    @mock.patch("secrets.token_hex")
    def test_regenerate_and_set_hmac_secret(self, mock_token_hex):