def test_heterozygosity_observed(self):

        # diploid
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)

        # polyploid
        g = GenotypeArray(
            [[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]],
             [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]],
             [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]],
             [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]],
             [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]],
             [[-1, -1, -1], [-1, -1, -1]]],
            dtype='i1')
        expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]
        actual = allel.heterozygosity_observed(g, fill=-1)
        aeq(expect, actual)
    def test_slice_types(self):

        g = GenotypeArray(diploid_genotype_data, dtype='i1')

        # row slice
        s = g[1:]
        assert isinstance(s, GenotypeArray)

        # col slice
        s = g[:, 1:]
        assert isinstance(s, GenotypeArray)

        # row index
        s = g[0]
        assert isinstance(s, GenotypeVector)
        assert not isinstance(s, GenotypeArray)

        # col index
        s = g[:, 0]
        assert isinstance(s, GenotypeVector)
        assert not isinstance(s, GenotypeArray)

        # ploidy index
        s = g[:, :, 0]
        assert isinstance(s, np.ndarray)
        assert not isinstance(s, GenotypeArray)

        # item
        s = g[0, 0, 0]
        assert isinstance(s, np.int8)
        assert not isinstance(s, GenotypeArray)
    def test_pairwise_distance_multidim(self):
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        gac = g.to_allele_counts()

        def metric(ac1, ac2):
            mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0)
            return mpd.sum()

        expect = [
            allel.mean_pairwise_difference_between(gac[:, 0],
                                                   gac[:, 1],
                                                   fill=0).sum()
        ]
        actual = allel.pairwise_distance(gac, metric)
        aeq(expect, actual)
    def test_haploidify_samples(self):

        # diploid
        g = GenotypeArray([[[0, 1], [2, 3]],
                           [[4, 5], [6, 7]],
                           [[8, 9], [10, 11]]], dtype='i1')
        h = g.haploidify_samples()
        assert 2 == h.ndim
        assert 3 == h.n_variants
        assert 2 == h.n_haplotypes
        assert np.int8 == h.dtype
        for i in range(g.n_variants):
            for j in range(g.n_samples):
                self.assertIn(h[i, j], set(g[i, j]))

        # triploid
        g = GenotypeArray([[[0, 1, 2], [3, 4, 5]],
                           [[6, 7, 8], [9, 10, 11]],
                           [[12, 13, 14], [15, 16, 17]]], dtype='i1')
        h = g.haploidify_samples()
        assert 2 == h.ndim
        assert 3 == h.n_variants
        assert 2 == h.n_haplotypes
        assert np.int8 == h.dtype
        for i in range(g.n_variants):
            for j in range(g.n_samples):
                self.assertIn(h[i, j], set(g[i, j]))
Exemple #5
0
    def test_heterozygosity_expected(self):

        def refimpl(f, ploidy, fill=0):
            """Limited reference implementation for testing purposes."""

            # check allele frequencies sum to 1
            af_sum = np.sum(f, axis=1)

            # assume three alleles
            p = f[:, 0]
            q = f[:, 1]
            r = f[:, 2]

            out = 1 - p**ploidy - q**ploidy - r**ploidy
            with ignore_invalid():
                out[(af_sum < 1) | np.isnan(af_sum)] = fill

            return out

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]
        af = g.count_alleles().to_frequencies()
        expect2 = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_almost_equal(expect1, actual)
        assert_array_almost_equal(expect2, actual)
        expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0]
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0)
        assert_array_almost_equal(expect3, actual)

        # polyploid
        g = GenotypeArray([[[0, 0, 0], [0, 0, 0]],
                           [[1, 1, 1], [1, 1, 1]],
                           [[1, 1, 1], [2, 2, 2]],
                           [[0, 0, 0], [0, 0, 1]],
                           [[0, 0, 0], [0, 0, 2]],
                           [[1, 1, 1], [0, 1, 2]],
                           [[0, 0, 1], [0, 1, 1]],
                           [[0, 1, 1], [0, 1, 2]],
                           [[0, 0, 0], [-1, -1, -1]],
                           [[0, 0, 1], [-1, -1, -1]],
                           [[-1, -1, -1], [-1, -1, -1]]], dtype='i1')
        af = g.count_alleles().to_frequencies()
        expect = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_almost_equal(expect, actual)
Exemple #6
0
def build_genotype_array(genotypes, pop_samples, markers):
    g = list()
    for marker in markers:
        mgt = list()
        for population, sample_list in pop_samples.items():
            for sample in sorted(sample_list):
                mgt.append(genotypes[sample][marker])
        g.append(mgt)
    gt = GenotypeArray(g)
    print('GenotypeArray construction complete:',
          gt.n_variants,
          'markers,',
          gt.n_samples,
          'samples, and a ploidy of',
          gt.ploidy,
          file=sys.stderr)
    return gt
    def test_inbreeding_coefficient(self):

        # diploid
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1])
        # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1])
        # expect = 1 - (ho/he)
        expect = [
            -1, -1, 1 - 0, 1 - (.5 / .375), 1 - (.5 / .375), 1 - (.5 / .375),
            1 - (1 / .5), 1 - (1 / .625), -1, 1 - (1 / .5), -1
        ]
        actual = allel.inbreeding_coefficient(g, fill=-1)
        assert_array_almost_equal(expect, actual)
    def test_constructor(self):

        # missing data arg
        with pytest.raises(TypeError):
            # noinspection PyArgumentList
            GenotypeArray()

        # data has wrong dtype
        data = 'foo bar'
        with pytest.raises(TypeError):
            GenotypeArray(data)

        # data has wrong dtype
        data = [4., 5., 3.7]
        with pytest.raises(TypeError):
            GenotypeArray(data)

        # data has wrong dimensions
        data = [1, 2, 3]
        with pytest.raises(TypeError):
            GenotypeArray(data)

        # data has wrong dimensions
        data = [[1, 2], [3, 4]]  # use HaplotypeArray instead
        with pytest.raises(TypeError):
            GenotypeArray(data)

        # diploid data (typed)
        g = GenotypeArray(diploid_genotype_data, dtype='i1')
        aeq(diploid_genotype_data, g)
        assert np.int8 == g.dtype

        # polyploid data (typed)
        g = GenotypeArray(triploid_genotype_data, dtype='i1')
        aeq(triploid_genotype_data, g)
        assert np.int8 == g.dtype
 def setup_instance(self, data, dtype=None):
     return GenotypeArray(data, dtype=dtype)