Esempio n. 1
0
    def test_filter_by_call_rate_in_memory(self):
        variations = load_zarr(TEST_DATA_DIR / 'test.zarr')
        variations = compute({'vars': variations},
                             store_variation_to_memory=True)['vars']
        pipeline_futures = {}

        future_result = remove_low_call_rate_vars(variations,
                                                  min_call_rate=0.5)
        _add_task_to_pipeline(pipeline_futures, future_result)

        future_result2 = remove_low_call_rate_vars(future_result[FLT_VARS],
                                                   min_call_rate=0.5,
                                                   filter_id='call_rate2')
        _add_task_to_pipeline(pipeline_futures, future_result2)
        processed = pipeline_futures
        self.assertEqual(processed[FLT_STATS]['call_rate'][N_KEPT], 5)
        self.assertEqual(processed[FLT_STATS]['call_rate'][N_FILTERED_OUT], 2)
        self.assertEqual(processed[FLT_STATS]['call_rate2'][N_KEPT], 5)
        self.assertEqual(processed[FLT_STATS]['call_rate2'][N_FILTERED_OUT], 0)

        gts = processed[FLT_VARS][GT_FIELD]
        self.assertEqual(gts.shape, (5, 3, 2))
        self.assertTrue(
            np.all(processed[FLT_VARS].samples == variations.samples))
        self.assertEqual(processed[FLT_VARS].metadata, variations.metadata)
Esempio n. 2
0
    def test_filter_by_call_rate(self):
        variations = create_dask_variations()
        pipeline_futures = {}

        future_result = remove_low_call_rate_vars(variations,
                                                  min_call_rate=0.5)
        _add_task_to_pipeline(pipeline_futures, future_result)

        future_result2 = remove_low_call_rate_vars(future_result[FLT_VARS],
                                                   min_call_rate=0.5,
                                                   filter_id='call_rate2')
        _add_task_to_pipeline(pipeline_futures, future_result2)

        processed = compute(pipeline_futures, store_variation_to_memory=True)
        self.assertEqual(processed[FLT_STATS]['call_rate'][N_KEPT], 5)
        self.assertEqual(processed[FLT_STATS]['call_rate'][N_FILTERED_OUT], 2)
        self.assertEqual(processed[FLT_STATS]['call_rate2'][N_KEPT], 5)
        self.assertEqual(processed[FLT_STATS]['call_rate2'][N_FILTERED_OUT], 0)

        gts = processed[FLT_VARS][GT_FIELD]
        self.assertEqual(gts.shape, (5, 3, 2))
        self.assertTrue(
            np.all(
                processed[FLT_VARS].samples == variations.samples.compute()))
        self.assertEqual(processed[FLT_VARS].metadata, variations.metadata)
Esempio n. 3
0
    def _test_filter_samples_by_call_rate(self, variations, do_computation):

        tasks = {}
        task = remove_low_call_rate_samples(variations,
                                            min_call_rate=0.5,
                                            filter_id='call_rate')
        _add_task_to_pipeline(tasks, task)
        task2 = remove_low_call_rate_samples(task[FLT_VARS],
                                             min_call_rate=0.5,
                                             filter_id='call_rate2')
        _add_task_to_pipeline(tasks, task2)

        if do_computation:
            processed_tasks = compute(tasks, store_variation_to_memory=True)
        else:
            processed_tasks = tasks
        processed = processed_tasks
        self.assertEqual(processed[FLT_STATS]['call_rate'][N_SAMPLES_KEPT], 2)
        self.assertEqual(
            processed[FLT_STATS]['call_rate'][N_SAMPLES_FILTERED_OUT], 1)
        self.assertEqual(processed[FLT_STATS]['call_rate2'][N_SAMPLES_KEPT], 2)
        self.assertEqual(
            processed[FLT_STATS]['call_rate2'][N_SAMPLES_FILTERED_OUT], 0)

        gts = processed[FLT_VARS][GT_FIELD]
        self.assertEqual(gts.shape, (7, 2, 2))
        self.assertTrue(
            np.all(processed[FLT_VARS].samples == ['mu16', 'upv196']))
        self.assertEqual(processed[FLT_VARS].metadata, variations.metadata)
Esempio n. 4
0
    def test_filter_and_hist_by_call_rate(self):
        variations = load_zarr(TEST_DATA_DIR / 'test.zarr')
        pipeline_futures = {}

        future_result = remove_low_call_rate_vars(variations,
                                                  min_call_rate=0.5,
                                                  calc_histogram=True)
        _add_task_to_pipeline(pipeline_futures, future_result)
        processed = compute(pipeline_futures, store_variation_to_memory=True)
        self.assertEqual(len(processed[FLT_STATS]['call_rate'][COUNT]),
                         DEF_NUM_BINS)
        self.assertEqual(len(processed[FLT_STATS]['call_rate'][BIN_EDGES]),
                         DEF_NUM_BINS + 1)
        self.assertEqual(processed[FLT_STATS]['call_rate']['limits'], [0.5])
Esempio n. 5
0
    def test_filter_and_hist_by_call_rate_in_memory(self):
        variations = create_dask_variations()
        variations = compute({'vars': variations},
                             store_variation_to_memory=True)['vars']
        pipeline_futures = {}

        future_result = remove_low_call_rate_vars(variations,
                                                  min_call_rate=0.5,
                                                  calc_histogram=True)
        _add_task_to_pipeline(pipeline_futures, future_result)
        processed = pipeline_futures
        self.assertEqual(len(processed[FLT_STATS]['call_rate'][COUNT]),
                         DEF_NUM_BINS)
        self.assertEqual(len(processed[FLT_STATS]['call_rate'][BIN_EDGES]),
                         DEF_NUM_BINS + 1)
        self.assertEqual(processed[FLT_STATS]['call_rate']['limits'], [0.5])