def test_empty_gt_allele_count(self): gts = np.array([]) with self.assertRaises(EmptyVariationsError): count_alleles(gts, max_alleles=3) variations = _create_empty_dask_variations() gts = variations[GT_FIELD] task = count_alleles(gts, max_alleles=3) counts = task.compute() self.assertEqual(counts.shape, (0, 4))
def keep_variable_variations(variations, max_alleles, filter_id='variable_variations'): gts = variations[GT_FIELD] some_not_missing_gts = va.any(gts != MISSING_INT, axis=2) selected_vars1 = va.any(some_not_missing_gts, axis=1) allele_counts = count_alleles(gts, max_alleles=max_alleles, count_missing=False) num_alleles_per_snp = va.sum(allele_counts > 0, axis=1) selected_vars2 = num_alleles_per_snp > 1 selected_vars = va.logical_and(selected_vars1, selected_vars2) selected_variations = variations.get_vars(selected_vars) num_selected_vars = va.count_nonzero(selected_vars) num_filtered = va.count_nonzero(va.logical_not(selected_vars)) flt_stats = {N_KEPT: num_selected_vars, N_FILTERED_OUT: num_filtered} return { FLT_VARS: selected_variations, FLT_ID: filter_id, FLT_STATS: flt_stats }
def test_allele_count(self): gts = np.array([[[0, 2], [-1, -1]], [[0, 2], [1, -1]], [[0, 0], [1, 1]], [[-1, -1], [-1, -1]]]) counts = count_alleles(gts, max_alleles=3) expected = np.array([[1, 0, 1, 2], [1, 1, 1, 1], [2, 2, 0, 0], [0, 0, 0, 4]]) self.assertTrue(np.all(counts == expected))
def test_allele_count_dask(self): variations = _create_dask_variations() gts = variations[GT_FIELD] counts = count_alleles(gts, max_alleles=3) expected = [[2, 2, 0, 2], [2, 2, 0, 2], [2, 2, 0, 2], [ 3, 1, 0, 2, ], [0, 0, 0, 6], [1, 1, 0, 4], [1, 3, 0, 2]] self.assertTrue(np.all(expected == counts.compute()))