def test_errors(self): with self.assertRaises(query_constructor.MalformedMatrixFilter): query_constructor.create_matrix_request_queries({}, ["specimen.organ"], "gene") with self.assertRaises(query_constructor.MalformedMatrixFeature): query_constructor.create_matrix_request_queries( { "op": "=", "field": "foo", "value": "bar" }, ["specimen.organ"], "baz")
def test_filter_conversion(self): filter_ = \ { "op": "and", "value": [ { "op": "=", "field": "project.project_core.project_short_name", "value": "project1" }, { "op": ">", "field": "genes_detected", "value": 1000 } ] } feature = "transcript" queries = query_constructor.create_matrix_request_queries( filter_, query_constructor.DEFAULT_FIELDS, feature) expected_cell_query = ( """ UNLOAD($$SELECT cell.cellkey, cell.cell_suspension_id, cell.genes_detected, specimen.*, library_preparation.*, project.*, analysis.*""" # noqa: E501 """ FROM cell LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey) LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) LEFT OUTER JOIN project on (cell.projectkey = project.projectkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) WHERE ((project.short_name = 'project1') AND (cell.genes_detected > 1000))$$) TO 's3://{results_bucket}/{request_id}/cell_metadata_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """) self.assertEqual(queries["cell_query"], expected_cell_query) expected_exp_query = (""" UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue FROM expression LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey) INNER JOIN cell on (expression.cellkey = cell.cellkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) INNER JOIN specimen on (cell.specimenkey = specimen.specimenkey) INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) INNER JOIN project on (cell.projectkey = project.projectkey) WHERE (NOT feature.isgene) AND expression.exprtype = 'Count' AND ((project.short_name = 'project1') AND (cell.genes_detected > 1000))$$) TO 's3://{results_bucket}/{request_id}/expression_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """) self.assertEqual(queries["expression_query"], expected_exp_query)
def test_field_conversion(self): filter_ = \ { "op": "not", "value": [{ "op": "in", "field": "foo", "value": ["bar", "baz"] }] } fields = [ "dss_bundle_fqid", "genes_detected", "library_preparation_protocol.strand" ] feature = "gene" queries = query_constructor.create_matrix_request_queries( filter_, fields, feature) expected_cell_query = (""" UNLOAD($$SELECT cell.cellkey, analysis.bundle_fqid, cell.genes_detected, library_preparation.strand FROM cell LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey) LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) LEFT OUTER JOIN project on (cell.projectkey = project.projectkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) WHERE NOT (foo IN ('bar', 'baz'))$$) TO 's3://{results_bucket}/{request_id}/cell_metadata_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """) self.assertEqual(queries["cell_query"], expected_cell_query)
def run(self, filter_: typing.Dict[str, typing.Any], fields: typing.List[str], feature: str, genus_species: str): """ Initialize a matrix service request and spawn redshift queries. :param filter_: Filter dict describing which cells to get expression data for :param fields: Which metadata fields to return :param format: MatrixFormat file format of output expression matrix :param feature: Which feature (gene vs transcript) to include in output """ logger.debug(f"Driver running with parameters: filter={filter_}, " f"fields={fields}, feature={feature}") try: matrix_request_queries = query_constructor.create_matrix_request_queries( query_constructor.speciesify_filter(filter_, genus_species), fields, feature) except (query_constructor.MalformedMatrixFilter, query_constructor.MalformedMatrixFeature) as exc: self.request_tracker.log_error(f"Query construction failed with error: {str(exc)}") raise s3_obj_keys = self._format_and_store_queries_in_s3(matrix_request_queries, genus_species) for key in s3_obj_keys: self._add_request_query_to_sqs(key, s3_obj_keys[key]) self.request_tracker.complete_subtask_execution(Subtask.DRIVER)
def test_transcript(self): filter_ = \ { "op": "in", "field": "foo", "value": ["bar", "baz"] } fields = ["test.field"] feature = "transcript" queries = query_constructor.create_matrix_request_queries( filter_, fields, feature) expected_exp_query = """ UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue FROM expression LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey) INNER JOIN cell on (expression.cellkey = cell.cellkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) INNER JOIN cell_suspension on (cell.cellsuspensionkey = cell_suspension.cellsuspensionkey) INNER JOIN specimen on (cell_suspension.specimenkey = specimen.specimenkey) INNER JOIN donor on (specimen.donorkey = donor.donorkey) INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) INNER JOIN project on (cell.projectkey = project.projectkey) WHERE (NOT feature.isgene) AND expression.exprtype = 'Count' AND foo IN ('bar', 'baz')$$) TO 's3://{results_bucket}/{request_id}/expression_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """ self.assertEqual(queries[QueryType.EXPRESSION], expected_exp_query) expected_feature_query = """ UNLOAD ($$SELECT * FROM feature WHERE (NOT feature.isgene) AND feature.genus_species = '{genus_species}'$$) to 's3://{results_bucket}/{request_id}/gene_metadata_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE; """ self.assertEqual(queries[QueryType.FEATURE], expected_feature_query)
def test_nested(self): filter_ = \ { "op": "or", "value": [ { "op": "and", "value": [ { "op": "in", "field": "num_field_1", "value": [1, 2, 3, 4] }, { "op": ">", "field": "num_field_2", "value": 50 }, { "op": "=", "field": "quuz", "value": "thud" } ] }, { "op": "not", "value": [ { "op": "in", "field": "foo", "value": ["bar", "baz"] }, ] }, { "op": ">", "field": "qux", "value": 5 }, { "op": "=", "field": "quuz", "value": "thud" } ] } fields = ["test.field1", "test.field2"] feature = "gene" queries = query_constructor.create_matrix_request_queries( filter_, fields, feature) expected_cell_query = ( """ UNLOAD($$SELECT cell.cellkey, test.field1, test.field2 FROM cell LEFT OUTER JOIN specimen on (cell.specimenkey = specimen.specimenkey) LEFT OUTER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) LEFT OUTER JOIN project on (cell.projectkey = project.projectkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) WHERE ((((num_field_1 IN (1, 2, 3, 4)) AND (num_field_2 > 50) AND (quuz = 'thud'))) OR (NOT (foo IN ('bar', 'baz'))) OR (qux > 5) OR (quuz = 'thud'))$$)""" # noqa: E501 """ TO 's3://{results_bucket}/{request_id}/cell_metadata_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """) self.assertEqual(queries["cell_query"], expected_cell_query) expected_exp_query = ( """ UNLOAD ($$SELECT cell.cellkey, expression.featurekey, expression.exrpvalue FROM expression LEFT OUTER JOIN feature on (expression.featurekey = feature.featurekey) INNER JOIN cell on (expression.cellkey = cell.cellkey) INNER JOIN analysis on (cell.analysiskey = analysis.analysiskey) INNER JOIN specimen on (cell.specimenkey = specimen.specimenkey) INNER JOIN library_preparation on (cell.librarykey = library_preparation.librarykey) INNER JOIN project on (cell.projectkey = project.projectkey) WHERE feature.isgene AND expression.exprtype = 'Count' AND ((((num_field_1 IN (1, 2, 3, 4)) AND (num_field_2 > 50) AND (quuz = 'thud'))) OR (NOT (foo IN ('bar', 'baz'))) OR (qux > 5) OR (quuz = 'thud'))$$)""" # noqa: E501 """ TO 's3://{results_bucket}/{request_id}/expression_' IAM_ROLE '{iam_role}' GZIP MANIFEST VERBOSE ; """) self.assertEqual(queries["expression_query"], expected_exp_query)