Example #1
0
    def test_mean_pairwise_diversity(self):

        # start with simplest case, two haplotypes, one pairwise comparison
        h = HaplotypeArray([[0, 0],
                            [1, 1],
                            [0, 1],
                            [1, 2],
                            [0, -1],
                            [-1, -1]])
        ac = h.count_alleles()
        expect = [0, 0, 1, 1, -1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        aeq(expect, actual)

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        assert_array_close(expect, actual)
Example #2
0
    def test_mean_pairwise_diversity(self):

        # start with simplest case, two haplotypes, one pairwise comparison
        h = HaplotypeArray([[0, 0],
                            [1, 1],
                            [0, 1],
                            [1, 2],
                            [0, -1],
                            [-1, -1]])
        ac = h.count_alleles()
        expect = [0, 0, 1, 1, -1, -1]
        actual = allel.stats.mean_pairwise_difference(ac, fill=-1)
        aeq(expect, actual)

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        actual = allel.stats.mean_pairwise_difference(ac, fill=-1)
        assert_array_close(expect, actual)
Example #3
0
    def test_rogers_huff_r(self):

        gn = [[0, 1, 2],
              [0, 1, 2]]
        expect = 1.
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2],
              [2, 1, 0]]
        expect = -1.
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 0, 0],
              [1, 1, 1]]
        actual = allel.stats.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, 1, 0, 1],
              [0, 1, 1, 0]]
        expect = 0
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2, -1],
              [0, 1, 2, 2]]
        expect = 1.
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2, 2],
              [0, 1, 2, -1]]
        expect = 1.
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2],
              [0, 1, -1]]
        expect = 1.
        actual = allel.stats.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 2],
              [2, 0],
              [0, 1]]
        expect = [-1, 1, -1]
        actual = allel.stats.rogers_huff_r(gn)
        assert_array_close(expect, actual)

        gn = [[0, 2, 0],
              [0, 2, 0],
              [2, 0, 2],
              [0, 2, -1]]
        expect = [1, -1, 1, -1, 1, -1]
        actual = allel.stats.rogers_huff_r(gn)
        assert_array_close(expect, actual)
Example #4
0
    def test_rogers_huff_r(self):

        gn = [[0, 1, 2],
              [0, 1, 2]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2],
              [2, 1, 0]]
        expect = -1.
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 0, 0],
              [1, 1, 1]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, 1, 0, 1],
              [0, 1, 1, 0]]
        expect = 0
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2, -1],
              [0, 1, 2, 2]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2, 2],
              [0, 1, 2, -1]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 1, 2],
              [0, 1, -1]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        eq(expect, actual)

        gn = [[0, 2],
              [2, 0],
              [0, 1]]
        expect = [-1, 1, -1]
        actual = allel.rogers_huff_r(gn)
        assert_array_close(expect, actual)

        gn = [[0, 2, 0],
              [0, 2, 0],
              [2, 0, 2],
              [0, 2, -1]]
        expect = [1, -1, 1, -1, 1, -1]
        actual = allel.rogers_huff_r(gn)
        assert_array_close(expect, actual)
Example #5
0
    def test_heterozygosity_expected(self):

        def refimpl(af, ploidy, fill=0):
            """Limited reference implementation for testing purposes."""

            # check allele frequencies sum to 1
            af_sum = np.sum(af, axis=1)

            # assume three alleles
            p = af[:, 0]
            q = af[:, 1]
            r = af[:, 2]

            out = 1 - p**ploidy - q**ploidy - r**ploidy
            with ignore_invalid():
                out[(af_sum < 1) | np.isnan(af_sum)] = fill

            return out

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]
        af = g.count_alleles().to_frequencies()
        expect2 = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy,
                                                     fill=-1)
        assert_array_close(expect1, actual)
        assert_array_close(expect2, actual)
        expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0]
        actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy,
                                                     fill=0)
        assert_array_close(expect3, actual)

        # polyploid
        g = GenotypeArray([[[0, 0, 0], [0, 0, 0]],
                           [[1, 1, 1], [1, 1, 1]],
                           [[1, 1, 1], [2, 2, 2]],
                           [[0, 0, 0], [0, 0, 1]],
                           [[0, 0, 0], [0, 0, 2]],
                           [[1, 1, 1], [0, 1, 2]],
                           [[0, 0, 1], [0, 1, 1]],
                           [[0, 1, 1], [0, 1, 2]],
                           [[0, 0, 0], [-1, -1, -1]],
                           [[0, 0, 1], [-1, -1, -1]],
                           [[-1, -1, -1], [-1, -1, -1]]], dtype='i1')
        af = g.count_alleles().to_frequencies()
        expect = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy,
                                                     fill=-1)
        assert_array_close(expect, actual)
Example #6
0
    def test_heterozygosity_expected(self):

        def refimpl(f, ploidy, fill=0):
            """Limited reference implementation for testing purposes."""

            # check allele frequencies sum to 1
            af_sum = np.sum(f, axis=1)

            # assume three alleles
            p = f[:, 0]
            q = f[:, 1]
            r = f[:, 2]

            out = 1 - p**ploidy - q**ploidy - r**ploidy
            with ignore_invalid():
                out[(af_sum < 1) | np.isnan(af_sum)] = fill

            return out

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]
        af = g.count_alleles().to_frequencies()
        expect2 = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_close(expect1, actual)
        assert_array_close(expect2, actual)
        expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0]
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0)
        assert_array_close(expect3, actual)

        # polyploid
        g = GenotypeArray([[[0, 0, 0], [0, 0, 0]],
                           [[1, 1, 1], [1, 1, 1]],
                           [[1, 1, 1], [2, 2, 2]],
                           [[0, 0, 0], [0, 0, 1]],
                           [[0, 0, 0], [0, 0, 2]],
                           [[1, 1, 1], [0, 1, 2]],
                           [[0, 0, 1], [0, 1, 1]],
                           [[0, 1, 1], [0, 1, 2]],
                           [[0, 0, 0], [-1, -1, -1]],
                           [[0, 0, 1], [-1, -1, -1]],
                           [[-1, -1, -1], [-1, -1, -1]]], dtype='i1')
        af = g.count_alleles().to_frequencies()
        expect = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_close(expect, actual)
Example #7
0
    def test_windowed_diversity(self):

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        # mean pairwise diversity
        # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(7/6)/10, (13/6)/10, 1/11]
        actual, _, _, _ = allel.windowed_diversity(pos, ac, size=10, start=1, stop=31)
        assert_array_close(expect, actual)
Example #8
0
    def test_inbreeding_coefficient(self):

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1])
        # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1])
        # expect = 1 - (ho/he)
        expect = [-1, -1, 1-0, 1-(.5/.375), 1-(.5/.375), 1-(.5/.375),
                  1-(1/.5), 1-(1/.625), -1, 1-(1/.5), -1]
        actual = allel.stats.inbreeding_coefficient(g, fill=-1)
        assert_array_close(expect, actual)
Example #9
0
    def test_windowed_diversity(self):

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        # mean pairwise diversity
        # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(7/6)/10, (13/6)/10, 1/11]
        actual, _, _, _ = allel.stats.windowed_diversity(pos, ac, size=10,
                                                         start=1,
                                                         stop=31)
        assert_array_close(expect, actual)
Example #10
0
    def test_inbreeding_coefficient(self):

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1])
        # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1])
        # expect = 1 - (ho/he)
        expect = [-1, -1, 1-0, 1-(.5/.375), 1-(.5/.375), 1-(.5/.375),
                  1-(1/.5), 1-(1/.625), -1, 1-(1/.5), -1]
        actual = allel.inbreeding_coefficient(g, fill=-1)
        assert_array_close(expect, actual)
Example #11
0
    def test_windowed_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()
        # mean pairwise divergence
        # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(6/4)/10, (9/4)/10, 0/11]
        actual, _, _, _ = allel.stats.windowed_divergence(
            pos, ac1, ac2, size=10, start=1, stop=31
        )
        assert_array_close(expect, actual)
Example #12
0
    def test_windowed_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()
        # mean pairwise divergence
        # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(6/4)/10, (9/4)/10, 0/11]
        actual, _, _, _ = allel.windowed_divergence(
            pos, ac1, ac2, size=10, start=1, stop=31
        )
        assert_array_close(expect, actual)