예제 #1
0
 def test_load_results(self, mock_parse_manifest, mock_read_csv):
     mock_parse_manifest.return_value = {
         "columns": ["a", "b", "c"],
         "part_urls": ["A", "B", "C"],
         "record_count": 5
     }
     mock_read_csv.return_value = pandas.DataFrame()
     reader = FeatureQueryResultsReader("test_manifest_key")
     reader.load_results()
     self.assertEqual(mock_read_csv.call_count, 3)
예제 #2
0
    def test_empty_results(self, mock_parse_manifest,
                           mock_upload_converted_matrix,
                           mock_complete_subtask_execution,
                           mock_complete_request, mock_creation_date,
                           mock_remove):

        mock_creation_date.return_value = date.to_string(
            datetime.datetime.utcnow())

        self.matrix_converter.query_results = {
            QueryType.CELL: CellQueryResultsReader("test_manifest_key"),
            QueryType.EXPRESSION:
            ExpressionQueryResultsReader("test_manifest_key"),
            QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key")
        }

        mock_parse_manifest.return_value = {"record_count": 0}

        self.matrix_converter.local_output_filename = "unit_test_empty_loom.loom"
        self.matrix_converter.run()

        self.assertEqual(os.path.getsize("unit_test_empty_loom.loom"), 0)

        mock_complete_subtask_execution.assert_called_once_with(
            Subtask.CONVERTER)
        mock_complete_request.assert_called_once()

        os.remove("unit_test_empty_loom.loom")
예제 #3
0
    def run(self):
        try:
            LOGGER.debug(f"Beginning matrix conversion run for {self.args.request_id}")
            self.query_results = {
                QueryType.CELL: CellQueryResultsReader(self.args.cell_metadata_manifest_key),
                QueryType.EXPRESSION: ExpressionQueryResultsReader(self.args.expression_manifest_key),
                QueryType.FEATURE: FeatureQueryResultsReader(self.args.gene_metadata_manifest_key)
            }

            LOGGER.debug(f"Beginning conversion to {self.format}")
            local_converted_path = getattr(self, f"_to_{self.format}")()
            LOGGER.debug(f"Conversion to {self.format} completed")

            LOGGER.debug(f"Beginning upload to S3")
            self._upload_converted_matrix(local_converted_path, self.target_path)
            LOGGER.debug("Upload to S3 complete, job finished")

            os.remove(local_converted_path)

            self.request_tracker.complete_subtask_execution(Subtask.CONVERTER)
            self.request_tracker.complete_request(duration=(date.get_datetime_now()
                                                            - date.to_datetime(self.request_tracker.creation_date))
                                                  .total_seconds())
        except Exception as e:
            LOGGER.info(f"Matrix Conversion failed on {self.args.request_id} with error {str(e)}")
            self.request_tracker.log_error(str(e))
            raise e
예제 #4
0
    def test__write_out_gene_dataframe__without_compression(self, mock_parse_manifest, mock_load_results, mock_to_csv):
        self.matrix_converter.query_results = {
            QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key")
        }
        results_dir = self.matrix_converter._make_directory()
        mock_load_results.return_value = pandas.DataFrame()

        results = self.matrix_converter._write_out_gene_dataframe(results_dir, 'genes.csv', compression=False)

        self.assertEqual(type(results).__name__, 'DataFrame')
        mock_load_results.assert_called_once()
        mock_to_csv.assert_called_once_with('./test_target/genes.csv', index_label='featurekey')
        shutil.rmtree(results_dir)
예제 #5
0
    def test__to_loom(self, mock_parse_manifest, mock_load_gene_results,
                      mock_load_cell_results, mock_generate_dfs):

        working_dir = "unit_test__to_loom"
        self.matrix_converter.working_dir = working_dir

        test_data = self._create_test_data()

        self.matrix_converter.query_results = {
            QueryType.CELL: CellQueryResultsReader("test_manifest_key"),
            QueryType.EXPRESSION:
            ExpressionQueryResultsReader("test_manifest_key"),
            QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key")
        }
        self.matrix_converter.query_results[QueryType.CELL].manifest = {
            "record_count": test_data["cells_df"].shape[0]
        }

        mock_load_gene_results.return_value = test_data["genes_df"]
        mock_load_cell_results.return_value = test_data["cells_df"]

        expression_manifest = {
            "record_count": sum(d.shape[0] for d in test_data["expr_dfs"])
        }
        mock_parse_manifest.return_value = expression_manifest

        mock_generate_dfs.return_value = iter(test_data["expr_dfs"])

        self.matrix_converter.local_output_filename = "unit_test__to_loom.loom"
        loom_path = self.matrix_converter._to_loom()

        ds = loompy.connect(loom_path)

        self.assertAlmostEqual(
            ds[:, :].sum(),
            sum(d["exprvalue"].sum() for d in test_data["expr_dfs"]), -1)

        # Every cell has 20 genes with non-zero expression. Check first and
        # last cells to makes sure that the expression matches
        self.assertAlmostEqual(
            ds[:, 0].sum(), test_data["expr_dfs"][0]['exprvalue'][:20].sum(),
            1)
        self.assertAlmostEqual(
            ds[:, 1].sum(), test_data["expr_dfs"][0]['exprvalue'][20:40].sum(),
            1)
        self.assertAlmostEqual(
            ds[:, -1].sum(),
            test_data["expr_dfs"][-1]['exprvalue'][-20:].sum(), 1)

        shutil.rmtree(working_dir)
예제 #6
0
    def test__write_out_gene_dataframe_10x(self, mock_parse_manifest,
                                           mock_load_results, mock_to_csv):
        self.matrix_converter.query_results = {
            QueryType.FEATURE: FeatureQueryResultsReader("test_manifest_key")
        }
        results_dir = self.matrix_converter._make_directory()
        mock_load_results.return_value = self._create_test_data()['genes_df']

        results = self.matrix_converter._write_out_gene_dataframe_10x(
            results_dir, 'genes.csv.gz')

        self.assertEqual(type(results).__name__, 'DataFrame')
        self.assertEqual(results.columns.tolist()[1], 'featuretype_10x')
        mock_load_results.assert_called_once()
        mock_to_csv.assert_called_once_with('./test_target/genes.csv.gz',
                                            compression='gzip',
                                            index_label='featurekey',
                                            header=False,
                                            sep='\t')
        shutil.rmtree(results_dir)
예제 #7
0
 def test_load_slice(self, mock_parse_manifest):
     reader = FeatureQueryResultsReader("test_manifest_key")
     with self.assertRaises(NotImplementedError):
         reader.load_slice(0)