def test_errors(self):
        with self.assertRaises(query_constructor.MalformedMatrixFilter):
            query_constructor.create_matrix_request_queries({},
                                                            ["specimen.organ"],
                                                            "gene")

        with self.assertRaises(query_constructor.MalformedMatrixFeature):
            query_constructor.create_matrix_request_queries(
                {
                    "op": "=",
                    "field": "foo",
                    "value": "bar"
                }, ["specimen.organ"], "baz")
    def test_filter_conversion(self):
        filter_ = \
            {
                "op": "and",
                "value": [
                    {
                        "op": "=",
                        "field": "project.project_core.project_short_name",
                        "value": "project1"
                    },
                    {
                        "op": ">",
                        "field": "genes_detected",
                        "value": 1000
                    }
                ]
            }

        feature = "transcript"
        queries = query_constructor.create_matrix_request_queries(
            filter_, query_constructor.DEFAULT_FIELDS, feature)

        expected_cell_query = (
            """
UNLOAD($$SELECT cell.cellkey, cell.cell_suspension_id, cell.genes_detected, specimen.*, library_preparation.*, project.*, analysis.*"""
            # noqa: E501
            """
FROM cell
  LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey)
  LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  LEFT OUTER JOIN project on (cell.projectkey = project.projectkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
WHERE ((project.short_name = 'project1') AND (cell.genes_detected > 1000))$$)
TO 's3://{results_bucket}/{request_id}/cell_metadata_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
""")
        self.assertEqual(queries["cell_query"], expected_cell_query)

        expected_exp_query = ("""
UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue
FROM expression
  LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey)
  INNER JOIN cell on (expression.cellkey = cell.cellkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
  INNER JOIN specimen on (cell.specimenkey = specimen.specimenkey)
  INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  INNER JOIN project on (cell.projectkey = project.projectkey)
WHERE (NOT feature.isgene)
  AND expression.exprtype = 'Count'
  AND ((project.short_name = 'project1') AND (cell.genes_detected > 1000))$$)
TO 's3://{results_bucket}/{request_id}/expression_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
""")
        self.assertEqual(queries["expression_query"], expected_exp_query)
    def test_field_conversion(self):
        filter_ = \
            {
                "op": "not",
                "value": [{
                    "op": "in",
                    "field": "foo",
                    "value": ["bar", "baz"]
                }]
            }
        fields = [
            "dss_bundle_fqid", "genes_detected",
            "library_preparation_protocol.strand"
        ]
        feature = "gene"
        queries = query_constructor.create_matrix_request_queries(
            filter_, fields, feature)

        expected_cell_query = ("""
UNLOAD($$SELECT cell.cellkey, analysis.bundle_fqid, cell.genes_detected, library_preparation.strand
FROM cell
  LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey)
  LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  LEFT OUTER JOIN project on (cell.projectkey = project.projectkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
WHERE NOT (foo IN ('bar', 'baz'))$$)
TO 's3://{results_bucket}/{request_id}/cell_metadata_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
""")
        self.assertEqual(queries["cell_query"], expected_cell_query)
    def run(self, filter_: typing.Dict[str, typing.Any], fields: typing.List[str], feature: str, genus_species: str):
        """
        Initialize a matrix service request and spawn redshift queries.

        :param filter_: Filter dict describing which cells to get expression data for
        :param fields: Which metadata fields to return
        :param format: MatrixFormat file format of output expression matrix
        :param feature: Which feature (gene vs transcript) to include in output
        """
        logger.debug(f"Driver running with parameters: filter={filter_}, "
                     f"fields={fields}, feature={feature}")

        try:
            matrix_request_queries = query_constructor.create_matrix_request_queries(
                query_constructor.speciesify_filter(filter_, genus_species),
                fields,
                feature)
        except (query_constructor.MalformedMatrixFilter, query_constructor.MalformedMatrixFeature) as exc:
            self.request_tracker.log_error(f"Query construction failed with error: {str(exc)}")
            raise

        s3_obj_keys = self._format_and_store_queries_in_s3(matrix_request_queries, genus_species)
        for key in s3_obj_keys:
            self._add_request_query_to_sqs(key, s3_obj_keys[key])

        self.request_tracker.complete_subtask_execution(Subtask.DRIVER)
Exemple #5
0
    def test_transcript(self):
        filter_ = \
            {
                "op": "in",
                "field": "foo",
                "value": ["bar", "baz"]
            }

        fields = ["test.field"]
        feature = "transcript"

        queries = query_constructor.create_matrix_request_queries(
            filter_, fields, feature)
        expected_exp_query = """
UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue
FROM expression
  LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey)
  INNER JOIN cell on (expression.cellkey = cell.cellkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
  INNER JOIN cell_suspension on (cell.cellsuspensionkey = cell_suspension.cellsuspensionkey)
  INNER JOIN specimen on (cell_suspension.specimenkey = specimen.specimenkey)
  INNER JOIN donor on (specimen.donorkey = donor.donorkey)
  INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  INNER JOIN project on (cell.projectkey = project.projectkey)
WHERE (NOT feature.isgene)
  AND expression.exprtype = 'Count'
  AND foo IN ('bar', 'baz')$$)
TO 's3://{results_bucket}/{request_id}/expression_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
"""
        self.assertEqual(queries[QueryType.EXPRESSION], expected_exp_query)

        expected_feature_query = """
UNLOAD ($$SELECT *
FROM feature
WHERE (NOT feature.isgene)
  AND feature.genus_species = '{genus_species}'$$)
to 's3://{results_bucket}/{request_id}/gene_metadata_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE;
"""
        self.assertEqual(queries[QueryType.FEATURE], expected_feature_query)
    def test_nested(self):
        filter_ = \
            {
                "op": "or",
                "value": [
                    {
                        "op": "and",
                        "value": [
                            {
                                "op": "in",
                                "field": "num_field_1",
                                "value": [1, 2, 3, 4]
                            },
                            {
                                "op": ">",
                                "field": "num_field_2",
                                "value": 50
                            },
                            {
                                "op": "=",
                                "field": "quuz",
                                "value": "thud"
                            }
                        ]
                    },
                    {
                        "op": "not",
                        "value": [
                            {
                                "op": "in",
                                "field": "foo",
                                "value": ["bar", "baz"]
                            },
                        ]
                    },
                    {
                        "op": ">",
                        "field": "qux",
                        "value": 5
                    },
                    {
                        "op": "=",
                        "field": "quuz",
                        "value": "thud"
                    }
                ]
            }
        fields = ["test.field1", "test.field2"]
        feature = "gene"

        queries = query_constructor.create_matrix_request_queries(
            filter_, fields, feature)
        expected_cell_query = (
            """
UNLOAD($$SELECT cell.cellkey, test.field1, test.field2
FROM cell
  LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey)
  LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  LEFT OUTER JOIN project on (cell.projectkey = project.projectkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
WHERE ((((num_field_1 IN (1, 2, 3, 4)) AND (num_field_2 > 50) AND (quuz = 'thud'))) OR (NOT (foo IN ('bar', 'baz'))) OR (qux > 5) OR (quuz = 'thud'))$$)"""

            # noqa: E501
            """
TO 's3://{results_bucket}/{request_id}/cell_metadata_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
""")
        self.assertEqual(queries["cell_query"], expected_cell_query)

        expected_exp_query = (
            """
UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue
FROM expression
  LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey)
  INNER JOIN cell on (expression.cellkey = cell.cellkey)
  INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey)
  INNER JOIN specimen on (cell.specimenkey = specimen.specimenkey)
  INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey)
  INNER JOIN project on (cell.projectkey = project.projectkey)
WHERE feature.isgene
  AND expression.exprtype = 'Count'
  AND ((((num_field_1 IN (1, 2, 3, 4)) AND (num_field_2 > 50) AND (quuz = 'thud'))) OR (NOT (foo IN ('bar', 'baz'))) OR (qux > 5) OR (quuz = 'thud'))$$)"""

            # noqa: E501
            """
TO 's3://{results_bucket}/{request_id}/expression_'
IAM_ROLE '{iam_role}'
GZIP
MANIFEST VERBOSE
;
""")
        self.assertEqual(queries["expression_query"], expected_exp_query)