def test_load_results(self, mock_parse_manifest, mock_read_csv): mock_parse_manifest.return_value = { "columns": ["a", "b", "c"], "part_urls": ["A", "B", "C"], "record_count": 5 } mock_read_csv.return_value = pandas.DataFrame() reader = FeatureQueryResultsReader("test_manifest_key") reader.load_results() self.assertEqual(mock_read_csv.call_count, 3)
def test_empty_results(self, mock_parse_manifest, mock_upload_converted_matrix, mock_complete_subtask_execution, mock_complete_request, mock_creation_date, mock_remove): mock_creation_date.return_value = date.to_string( datetime.datetime.utcnow()) self.matrix_converter.query_results = { QueryType.CELL: CellQueryResultsReader("test_manifest_key"), QueryType.EXPRESSION: ExpressionQueryResultsReader("test_manifest_key"), QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key") } mock_parse_manifest.return_value = {"record_count": 0} self.matrix_converter.local_output_filename = "unit_test_empty_loom.loom" self.matrix_converter.run() self.assertEqual(os.path.getsize("unit_test_empty_loom.loom"), 0) mock_complete_subtask_execution.assert_called_once_with( Subtask.CONVERTER) mock_complete_request.assert_called_once() os.remove("unit_test_empty_loom.loom")
def run(self): try: LOGGER.debug(f"Beginning matrix conversion run for {self.args.request_id}") self.query_results = { QueryType.CELL: CellQueryResultsReader(self.args.cell_metadata_manifest_key), QueryType.EXPRESSION: ExpressionQueryResultsReader(self.args.expression_manifest_key), QueryType.FEATURE: FeatureQueryResultsReader(self.args.gene_metadata_manifest_key) } LOGGER.debug(f"Beginning conversion to {self.format}") local_converted_path = getattr(self, f"_to_{self.format}")() LOGGER.debug(f"Conversion to {self.format} completed") LOGGER.debug(f"Beginning upload to S3") self._upload_converted_matrix(local_converted_path, self.target_path) LOGGER.debug("Upload to S3 complete, job finished") os.remove(local_converted_path) self.request_tracker.complete_subtask_execution(Subtask.CONVERTER) self.request_tracker.complete_request(duration=(date.get_datetime_now() - date.to_datetime(self.request_tracker.creation_date)) .total_seconds()) except Exception as e: LOGGER.info(f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}") self.request_tracker.log_error(str(e)) raise e
def test__write_out_gene_dataframe__without_compression(self, mock_parse_manifest, mock_load_results, mock_to_csv): self.matrix_converter.query_results = { QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key") } results_dir = self.matrix_converter._make_directory() mock_load_results.return_value = pandas.DataFrame() results = self.matrix_converter._write_out_gene_dataframe(results_dir, 'genes.csv', compression=False) self.assertEqual(type(results).__name__, 'DataFrame') mock_load_results.assert_called_once() mock_to_csv.assert_called_once_with('./test_target/genes.csv', index_label='featurekey') shutil.rmtree(results_dir)
def test__to_loom(self, mock_parse_manifest, mock_load_gene_results, mock_load_cell_results, mock_generate_dfs): working_dir = "unit_test__to_loom" self.matrix_converter.working_dir = working_dir test_data = self._create_test_data() self.matrix_converter.query_results = { QueryType.CELL: CellQueryResultsReader("test_manifest_key"), QueryType.EXPRESSION: ExpressionQueryResultsReader("test_manifest_key"), QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key") } self.matrix_converter.query_results[QueryType.CELL].manifest = { "record_count": test_data["cells_df"].shape[0] } mock_load_gene_results.return_value = test_data["genes_df"] mock_load_cell_results.return_value = test_data["cells_df"] expression_manifest = { "record_count": sum(d.shape[0] for d in test_data["expr_dfs"]) } mock_parse_manifest.return_value = expression_manifest mock_generate_dfs.return_value = iter(test_data["expr_dfs"]) self.matrix_converter.local_output_filename = "unit_test__to_loom.loom" loom_path = self.matrix_converter._to_loom() ds = loompy.connect(loom_path) self.assertAlmostEqual( ds[:, :].sum(), sum(d["exprvalue"].sum() for d in test_data["expr_dfs"]), -1) # Every cell has 20 genes with non-zero expression. Check first and # last cells to makes sure that the expression matches self.assertAlmostEqual( ds[:, 0].sum(), test_data["expr_dfs"][0]['exprvalue'][:20].sum(), 1) self.assertAlmostEqual( ds[:, 1].sum(), test_data["expr_dfs"][0]['exprvalue'][20:40].sum(), 1) self.assertAlmostEqual( ds[:, -1].sum(), test_data["expr_dfs"][-1]['exprvalue'][-20:].sum(), 1) shutil.rmtree(working_dir)
def test__write_out_gene_dataframe_10x(self, mock_parse_manifest, mock_load_results, mock_to_csv): self.matrix_converter.query_results = { QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key") } results_dir = self.matrix_converter._make_directory() mock_load_results.return_value = self._create_test_data()['genes_df'] results = self.matrix_converter._write_out_gene_dataframe_10x( results_dir, 'genes.csv.gz') self.assertEqual(type(results).__name__, 'DataFrame') self.assertEqual(results.columns.tolist()[1], 'featuretype_10x') mock_load_results.assert_called_once() mock_to_csv.assert_called_once_with('./test_target/genes.csv.gz', compression='gzip', index_label='featurekey', header=False, sep='\t') shutil.rmtree(results_dir)
def test_load_slice(self, mock_parse_manifest): reader = FeatureQueryResultsReader("test_manifest_key") with self.assertRaises(NotImplementedError): reader.load_slice(0)