コード例 #1
0
    def test_filter_chi2_gt_sample_sets(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
                           [[0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1]],
                           [[0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5, 6]
        samples1 = [1, 2, 3]
        samples2 = [4, 5, 6]
        flt = Chi2GtFreqs2SampleSetsFilter(samples1,
                                           samples2,
                                           min_pval=0.05,
                                           n_bins=2,
                                           report_selection=True)
        res = flt(variations)
        assert list(res[COUNTS]) == [2, 2]
        assert numpy.all(res[FLT_VARS][GT_FIELD] == gts[:2, ...])
        assert res[FLT_STATS][N_KEPT] == 2
        assert res[FLT_STATS][TOT] == 4
        assert res[FLT_STATS][N_FILTERED_OUT] == 2
        assert res[SELECTED_VARS].shape

        flt = Chi2GtFreqs2SampleSetsFilter(samples1,
                                           samples2,
                                           min_pval=0.05,
                                           n_bins=2,
                                           return_discarded=True)
        res = flt(variations)
        assert res[DISCARDED_VARS].num_variations
コード例 #2
0
ファイル: test_filter.py プロジェクト: JoseBlanca/variation
    def test_filter_or(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [1, 1], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1]],
                           [[0, 0], [0, 0], [0, 1], [0, 0], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5]

        filter1 = ObsHetFilter(min_num_genotypes=0)
        filter2 = ObsHetFilter(min_het=0.2, min_num_genotypes=0)

        filtered = OrFilter([filter1, filter2])(variations)

        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0
コード例 #3
0
    def test_filter_or(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [1, 1], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1]],
                           [[0, 0], [0, 0], [0, 1], [0, 0], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5]

        filter1 = ObsHetFilter(min_num_genotypes=0)
        filter2 = ObsHetFilter(min_het=0.2, min_num_genotypes=0)

        filtered = OrFilter([filter1, filter2])(variations)

        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0
コード例 #4
0
ファイル: test_filter.py プロジェクト: JoseBlanca/variation
    def test_filter_chi2_gt_sample_sets(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]],
                           [[0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1]],
                           [[0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5, 6]
        samples1 = [1, 2, 3]
        samples2 = [4, 5, 6]
        flt = Chi2GtFreqs2SampleSetsFilter(samples1, samples2, min_pval=0.05,
                                           n_bins=2, report_selection=True)
        res = flt(variations)
        assert list(res[COUNTS]) == [2, 2]
        assert numpy.all(res[FLT_VARS][GT_FIELD] == gts[:2, ...])
        assert res[FLT_STATS][N_KEPT] == 2
        assert res[FLT_STATS][TOT] == 4
        assert res[FLT_STATS][N_FILTERED_OUT] == 2
        assert res[SELECTED_VARS].shape

        flt = Chi2GtFreqs2SampleSetsFilter(samples1, samples2, min_pval=0.05,
                                           n_bins=2, return_discarded=True)
        res = flt(variations)
        assert res[DISCARDED_VARS].num_variations
コード例 #5
0
ファイル: test_filter.py プロジェクト: JoseBlanca/variation
    def test_filter_obs_het(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [1, 1], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1]],
                           [[0, 0], [0, 0], [0, 1], [0, 0], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5]

        filtered = ObsHetFilter(min_num_genotypes=0,
                                report_selection=True)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0
        assert filtered[SELECTED_VARS].shape

        filtered = ObsHetFilter(min_het=0.2, min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[0, 2, 3]])
        assert filtered[FLT_STATS][N_KEPT] == 3
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 1

        filtered = ObsHetFilter(min_het=0.2, min_num_genotypes=10)(variations)
        assert filtered[FLT_STATS][N_KEPT] == 0
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 4

        filtered = ObsHetFilter(min_het=0.2, min_num_genotypes=10,
                                keep_missing=True)(variations)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0

        return
        filtered = ObsHetFilter(max_het=0.1, min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[1]])

        filtered = ObsHetFilter(min_het=0.2, max_het=0.3,
                                min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[0, 2, 3]])

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        filtered = ObsHetFilter(min_het=0.6, max_het=0.9)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {}

        filtered = ObsHetFilter(min_het=0.6, max_het=0.9, min_call_dp=5)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {0: 978, -1: 910, 1: 774, 2: 92}

        filtered = ObsHetFilter(min_het=0.6, max_het=0.9, min_call_dp=5,
                                n_bins=3, range_=(0, 1))(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {0: 978, -1: 910, 1: 774, 2: 92}
        assert numpy.all(filtered[COUNTS] == [391, 14, 10])
        assert numpy.all(filtered[EDGES] == [0, 1 / 3, 2 / 3, 1])

        samples = hdf5.samples[:50]
        filtered = ObsHetFilter(min_het=0.6, max_het=0.9, min_call_dp=5,
                                n_bins=3, range_=(0, 1), samples=samples)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert sum(filtered[COUNTS]) == sum([339, 14, 6])
コード例 #6
0
    def test_filter_obs_het(self):
        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [1, 1], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [1, 1]],
                           [[0, 0], [0, 0], [0, 0], [0, 0], [0, 1]],
                           [[0, 0], [0, 0], [0, 1], [0, 0], [1, 1]]])
        variations[GT_FIELD] = gts
        variations.samples = [1, 2, 3, 4, 5]

        filtered = ObsHetFilter(min_num_genotypes=0,
                                report_selection=True)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0
        assert filtered[SELECTED_VARS].shape

        filtered = ObsHetFilter(min_het=0.2, min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[0, 2, 3]])
        assert filtered[FLT_STATS][N_KEPT] == 3
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 1

        filtered = ObsHetFilter(min_het=0.2, min_num_genotypes=10)(variations)
        assert filtered[FLT_STATS][N_KEPT] == 0
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 4

        filtered = ObsHetFilter(min_het=0.2,
                                min_num_genotypes=10,
                                keep_missing=True)(variations)
        assert filtered[FLT_STATS][N_KEPT] == 4
        assert filtered[FLT_STATS][TOT] == 4
        assert filtered[FLT_STATS][N_FILTERED_OUT] == 0

        return
        filtered = ObsHetFilter(max_het=0.1, min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[1]])

        filtered = ObsHetFilter(min_het=0.2, max_het=0.3,
                                min_num_genotypes=0)(variations)
        assert numpy.all(filtered[FLT_VARS][GT_FIELD] == gts[[0, 2, 3]])

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        filtered = ObsHetFilter(min_het=0.6, max_het=0.9)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {}

        filtered = ObsHetFilter(min_het=0.6, max_het=0.9, min_call_dp=5)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {0: 978, -1: 910, 1: 774, 2: 92}

        filtered = ObsHetFilter(min_het=0.6,
                                max_het=0.9,
                                min_call_dp=5,
                                n_bins=3,
                                range_=(0, 1))(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert counts == {0: 978, -1: 910, 1: 774, 2: 92}
        assert numpy.all(filtered[COUNTS] == [391, 14, 10])
        assert numpy.all(filtered[EDGES] == [0, 1 / 3, 2 / 3, 1])

        samples = hdf5.samples[:50]
        filtered = ObsHetFilter(min_het=0.6,
                                max_het=0.9,
                                min_call_dp=5,
                                n_bins=3,
                                range_=(0, 1),
                                samples=samples)(hdf5)
        counts = Counter(filtered[FLT_VARS][GT_FIELD].flat)
        assert sum(filtered[COUNTS]) == sum([339, 14, 6])