Example #1
0
 def test_masked_windowed_divergence(self):
     h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                         [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                         [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
     h1 = h.take([0, 1], axis=1)
     h2 = h.take([2, 3], axis=1)
     ac1 = h1.count_alleles()
     ac2 = h2.count_alleles()
     pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
     mask = np.tile(np.repeat(np.array([True, False]), 5), 3)
     expect, _, _, _ = allel.windowed_divergence(pos,
                                                 ac1,
                                                 ac2,
                                                 size=5,
                                                 start=1,
                                                 stop=31)
     expect = expect[::2]
     actual, _, _, _ = allel.windowed_divergence(pos,
                                                 ac1,
                                                 ac2,
                                                 size=10,
                                                 start=1,
                                                 stop=31,
                                                 is_accessible=mask)
     assert_array_almost_equal(expect, actual)
Example #2
0
 def test_patterson_d(self):
     aca = [[0, 2],
            [2, 0],
            [2, 0],
            [1, 1],
            [0, 0]]
     acb = [[0, 2],
            [0, 2],
            [0, 2],
            [1, 1],
            [0, 2]]
     acc = [[2, 0],
            [2, 0],
            [0, 2],
            [1, 1],
            [0, 2]]
     acd = [[2, 0],
            [0, 2],
            [2, 0],
            [1, 1],
            [0, 2]]
     num, den = allel.patterson_d(aca, acb, acc, acd)
     expect_num = [0., 1., -1., 0., np.nan]
     expect_den = [0., 1., 1., 0.25, np.nan]
     assert_array_almost_equal(expect_num, num)
     assert_array_almost_equal(expect_den, den)
Example #3
0
 def test_masked_windowed_diversity(self):
     # four haplotypes, 6 pairwise comparison
     h = allel.HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                               [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                               [0, 1, 1, 2], [0, 1, -1, -1],
                               [-1, -1, -1, -1]])
     ac = h.count_alleles()
     # mean pairwise diversity
     # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
     pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
     mask = np.tile(np.repeat(np.array([True, False]), 5), 3)
     # expected is every other window with size 5
     expect, _, _, _ = allel.windowed_diversity(pos,
                                                ac,
                                                size=5,
                                                start=1,
                                                stop=31)
     # only getting every other element
     expect = expect[::2]
     # actual is window of size 10 with the last half masked out
     actual, _, _, _ = allel.windowed_diversity(pos,
                                                ac,
                                                size=10,
                                                start=1,
                                                stop=31,
                                                is_accessible=mask)
     assert_array_almost_equal(expect, actual)
Example #4
0
    def test_windowed_tajima_d(self):
        from allel import windowed_tajima_d

        pos = np.array([1, 11, 21, 31, 41])

        # example with calculable value
        ac = AlleleCountsArray([[1, 3], [2, 2], [3, 1], [1, 3], [2, 2]])
        expect = np.array([0.168] * 3)
        actual, _, _ = windowed_tajima_d(pos, ac, size=25, step=10)
        assert_array_almost_equal(expect, actual, decimal=3)

        # too few sites
        actual, _, _ = windowed_tajima_d(pos, ac, size=15, step=10)
        assert 4 == len(actual)
        assert np.all(np.isnan(actual))

        # too few segregating sites
        ac = AlleleCountsArray([[4, 0], [2, 2], [3, 1], [4, 0], [2, 2]])
        actual, _, _ = windowed_tajima_d(pos, ac, size=25, step=10)
        assert 3 == len(actual)
        assert np.all(np.isnan(actual))
        # allow people to override if they really want to
        expect = np.array([0.592] * 3)
        actual, _, _ = windowed_tajima_d(pos,
                                         ac,
                                         size=25,
                                         step=10,
                                         min_sites=2)
        assert_array_almost_equal(expect, actual, decimal=3)
Example #5
0
    def test_moving_tajima_d(self):
        from allel import moving_tajima_d

        # example with calculable value
        ac = AlleleCountsArray([[1, 3],
                                [2, 2],
                                [3, 1],
                                [1, 3],
                                [2, 2]])
        expect = np.array([0.168] * 3)
        actual = moving_tajima_d(ac, size=3, step=1)
        assert_array_almost_equal(expect, actual, decimal=3)

        # too few sites
        actual = moving_tajima_d(ac, size=2, step=1)
        assert 4 == len(actual)
        assert np.all(np.isnan(actual))

        # too few segregating sites
        ac = AlleleCountsArray([[4, 0],
                                [2, 2],
                                [3, 1],
                                [4, 0],
                                [2, 2]])
        actual = moving_tajima_d(ac, size=3, step=1)
        assert 3 == len(actual)
        assert np.all(np.isnan(actual))
        # allow people to override if they really want to
        expect = np.array([0.592] * 3)
        actual = moving_tajima_d(ac, size=3, step=1, min_sites=2)
        assert_array_almost_equal(expect, actual, decimal=3)
Example #6
0
    def test_mean_pairwise_diversity(self):

        # start with simplest case, two haplotypes, one pairwise comparison
        h = HaplotypeArray([[0, 0],
                            [1, 1],
                            [0, 1],
                            [1, 2],
                            [0, -1],
                            [-1, -1]])
        ac = h.count_alleles()
        expect = [0, 0, 1, 1, -1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        aeq(expect, actual)

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0],
                            [0, 0, 0, 1],
                            [0, 0, 1, 1],
                            [0, 1, 1, 1],
                            [1, 1, 1, 1],
                            [0, 0, 1, 2],
                            [0, 1, 1, 2],
                            [0, 1, -1, -1],
                            [-1, -1, -1, -1]])
        ac = h.count_alleles()
        expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        actual = allel.mean_pairwise_difference(ac, fill=-1)
        assert_array_almost_equal(expect, actual)
def test_nsl01_scan_b():

    h = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]])
    nsl0, nsl1 = nsl01_scan(h)
    expect_nsl0 = [1, 4 / 3, 4 / 3, 4 / 3]
    assert_array_almost_equal(expect_nsl0, nsl0)
    expect_nsl1 = [np.nan, np.nan, np.nan, np.nan]
    assert_array_almost_equal(expect_nsl1, nsl1)
def test_nsl01_scan_a():

    h = np.array([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1],
                  [0, 0, 0, 1, 1, 1]])
    nsl0, nsl1 = nsl01_scan(h)
    expect_nsl0 = [1, 2, 3, 4]
    assert_array_almost_equal(expect_nsl0, nsl0)
    expect_nsl1 = [1, 2, 3, 4]
    assert_array_almost_equal(expect_nsl1, nsl1)
Example #9
0
 def test_patterson_f3(self):
     aca = [[0, 2], [2, 0], [0, 2], [0, 2], [0, 0]]
     acb = [[2, 0], [0, 2], [0, 2], [0, 2], [0, 2]]
     acc = [[1, 1], [1, 1], [0, 2], [2, 0], [1, 1]]
     expect_f3 = [-.5, -.5, 0., 1., np.nan]
     actual_f3, actual_hzc = allel.patterson_f3(acc, aca, acb)
     assert_array_almost_equal(expect_f3, actual_f3)
     expect_hzc = [1., 1., 0., 0., 1.]
     assert_array_almost_equal(expect_hzc, actual_hzc)
Example #10
0
 def test_patterson_f2(self):
     aca = [[0, 2],
            [2, 0],
            [1, 1],
            [0, 0]]
     acb = [[0, 2],
            [0, 2],
            [0, 2],
            [0, 2]]
     expect = [0., 1., 0., np.nan]
     actual = allel.patterson_f2(aca, acb)
     assert_array_almost_equal(expect, actual)
def test_ihh_scan_d():
    # edge case: start from 0 haplotype homozygosity
    gaps = np.array([10], dtype='f8')
    h = np.array([[0, 1], [1, 0]])

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_almost_equal(expect, actual)

    expect = [0, 0]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_almost_equal(expect, actual)
def test_ihh_scan_a():
    # simple case: 1 haplotype pair, haplotype homozygosity over all variants
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 0], [0, 0], [0, 0]])

    # do not include edges
    expect = [np.nan, np.nan, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_almost_equal(expect, actual)

    # include edges
    expect = [0, 10, 20]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_almost_equal(expect, actual)
def test_ihh_scan_c():
    # simple case: 1 haplotype pair, haplotype homozygosity decays
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 1], [0, 0], [0, 0]])

    # do not include edges
    expect = [0, 5, 15]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_almost_equal(expect, actual)

    # include edges
    expect = [0, 5, 15]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_almost_equal(expect, actual)
def test_ihh_scan_b():
    # 1 haplotype pair, haplotype homozygosity over all variants
    # handling of large gap (encoded as -1)
    gaps = np.array([10, -1], dtype='f8')
    h = np.array([[0, 0], [0, 0], [0, 0]])

    # do not include edges
    expect = [np.nan, np.nan, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False)
    assert_array_almost_equal(expect, actual)

    # include edges
    expect = [0, 10, np.nan]
    actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True)
    assert_array_almost_equal(expect, actual)
def test_pbs():

    # minimal input data, sanity check for output existence and type
    ac1 = [[2, 0], [0, 2], [1, 1], [2, 0], [0, 2]]
    ac2 = [[1, 1], [2, 0], [0, 2], [2, 0], [0, 2]]
    ac3 = [[0, 2], [1, 1], [2, 0], [2, 0], [0, 2]]
    ret = pbs(ac1, ac2, ac3, window_size=2, window_step=1)
    assert isinstance(ret, np.ndarray)
    assert 1 == ret.ndim
    assert 4 == ret.shape[0]
    assert 'f' == ret.dtype.kind
    # regression check
    expect = [0.52349464, 0., -0.85199356, np.nan]
    assert_array_almost_equal(expect, ret)
    # final value is nan because variants in final window are non-segregating
    assert np.isnan(ret[3])
Example #16
0
    def test_heterozygosity_expected(self):

        def refimpl(f, ploidy, fill=0):
            """Limited reference implementation for testing purposes."""

            # check allele frequencies sum to 1
            af_sum = np.sum(f, axis=1)

            # assume three alleles
            p = f[:, 0]
            q = f[:, 1]
            r = f[:, 2]

            out = 1 - p**ploidy - q**ploidy - r**ploidy
            with ignore_invalid():
                out[(af_sum < 1) | np.isnan(af_sum)] = fill

            return out

        # diploid
        g = GenotypeArray([[[0, 0], [0, 0]],
                           [[1, 1], [1, 1]],
                           [[1, 1], [2, 2]],
                           [[0, 0], [0, 1]],
                           [[0, 0], [0, 2]],
                           [[1, 1], [1, 2]],
                           [[0, 1], [0, 1]],
                           [[0, 1], [1, 2]],
                           [[0, 0], [-1, -1]],
                           [[0, 1], [-1, -1]],
                           [[-1, -1], [-1, -1]]], dtype='i1')
        expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]
        af = g.count_alleles().to_frequencies()
        expect2 = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_almost_equal(expect1, actual)
        assert_array_almost_equal(expect2, actual)
        expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0]
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0)
        assert_array_almost_equal(expect3, actual)

        # polyploid
        g = GenotypeArray([[[0, 0, 0], [0, 0, 0]],
                           [[1, 1, 1], [1, 1, 1]],
                           [[1, 1, 1], [2, 2, 2]],
                           [[0, 0, 0], [0, 0, 1]],
                           [[0, 0, 0], [0, 0, 2]],
                           [[1, 1, 1], [0, 1, 2]],
                           [[0, 0, 1], [0, 1, 1]],
                           [[0, 1, 1], [0, 1, 2]],
                           [[0, 0, 0], [-1, -1, -1]],
                           [[0, 0, 1], [-1, -1, -1]],
                           [[-1, -1, -1], [-1, -1, -1]]], dtype='i1')
        af = g.count_alleles().to_frequencies()
        expect = refimpl(af, ploidy=g.ploidy, fill=-1)
        actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1)
        assert_array_almost_equal(expect, actual)
Example #17
0
    def test_windowed_diversity(self):

        # four haplotypes, 6 pairwise comparison
        h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                            [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                            [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
        ac = h.count_alleles()
        # mean pairwise diversity
        # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(7 / 6) / 10, (13 / 6) / 10, 1 / 11]
        actual, _, _, _ = allel.windowed_diversity(pos,
                                                   ac,
                                                   size=10,
                                                   start=1,
                                                   stop=31)
        assert_array_almost_equal(expect, actual)
Example #18
0
    def test_inbreeding_coefficient(self):

        # diploid
        g = GenotypeArray(
            [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]],
             [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]],
             [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]],
             [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]],
            dtype='i1')
        # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1])
        # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1])
        # expect = 1 - (ho/he)
        expect = [
            -1, -1, 1 - 0, 1 - (.5 / .375), 1 - (.5 / .375), 1 - (.5 / .375),
            1 - (1 / .5), 1 - (1 / .625), -1, 1 - (1 / .5), -1
        ]
        actual = allel.inbreeding_coefficient(g, fill=-1)
        assert_array_almost_equal(expect, actual)
Example #19
0
    def test_windowed_divergence(self):

        # simplest case, two haplotypes in each population
        h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1],
                            [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2],
                            [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]])
        h1 = h.take([0, 1], axis=1)
        h2 = h.take([2, 3], axis=1)
        ac1 = h1.count_alleles()
        ac2 = h2.count_alleles()
        # mean pairwise divergence
        # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1]
        pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27])
        expect = [(6 / 4) / 10, (9 / 4) / 10, 0 / 11]
        actual, _, _, _ = allel.windowed_divergence(pos,
                                                    ac1,
                                                    ac2,
                                                    size=10,
                                                    start=1,
                                                    stop=31)
        assert_array_almost_equal(expect, actual)
def test_ihh01_scan_a():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0]])

    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05, include_edges=False)
    expect_ihh0 = [np.nan, np.nan, np.nan, 5]
    assert_array_almost_equal(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, 5, 5, np.nan]
    assert_array_almost_equal(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, np.nan, np.nan, 5]
    assert_array_almost_equal(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, 5, 5, np.nan]
    assert_array_almost_equal(expect_ihh1, ihh1)
Example #21
0
def test_ihh01_scan_e():
    # min_maf
    gaps = np.array([10, 10], dtype='f8')
    h = np.array([[0, 0, 1],
                  [0, 0, 1],
                  [0, 0, 1]])

    expect_ihh0 = [0, 10, 20]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0, include_edges=True)
    assert_array_almost_equal(expect_ihh0, ihh0)
    assert_array_almost_equal(expect_ihh1, ihh1)

    expect_ihh0 = [np.nan, np.nan, np.nan]
    expect_ihh1 = [np.nan, np.nan, np.nan]
    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0.4, include_edges=True)
    assert_array_almost_equal(expect_ihh0, ihh0)
    assert_array_almost_equal(expect_ihh1, ihh1)
Example #22
0
    def test_rogers_huff_r(self):

        gn = [[0, 1, 2], [0, 1, 2]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 1, 2], [2, 1, 0]]
        expect = -1.
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 0, 0], [0, 0, 0]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, 0, 0], [1, 1, 1]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[1, 1, 1], [1, 1, 1]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, -1, 0], [-1, 1, -1]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, 1, 0], [-1, -1, -1]]
        actual = allel.rogers_huff_r(gn)
        assert np.isnan(actual)

        gn = [[0, 1, 0, 1], [0, 1, 1, 0]]
        expect = 0
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 1, 2, -1], [0, 1, 2, 2]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 1, 2, 2], [0, 1, 2, -1]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 1, 2], [0, 1, -1]]
        expect = 1.
        actual = allel.rogers_huff_r(gn)
        assert expect == actual

        gn = [[0, 2], [2, 0], [0, 1]]
        expect = [-1, 1, -1]
        actual = allel.rogers_huff_r(gn)
        assert_array_almost_equal(expect, actual)

        gn = [[0, 2, 0], [0, 2, 0], [2, 0, 2], [0, 2, -1]]
        expect = [1, -1, 1, -1, 1, -1]
        actual = allel.rogers_huff_r(gn)
        assert_array_almost_equal(expect, actual)
def test_ihh01_scan_d():
    gaps = np.array([10, 10, 10], dtype='f8')
    h = np.array([[0, 0, 1, 1, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 0, 0, 1, 1],
                  [0, 0, 0, 1, 1, 1]])

    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05)
    x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_almost_equal(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, x, x]
    assert_array_almost_equal(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0)
    expect_ihh0 = [np.nan, np.nan, x, x]
    assert_array_almost_equal(expect_ihh0, ihh0)
    expect_ihh1 = [np.nan, np.nan, x, x]
    assert_array_almost_equal(expect_ihh1, ihh1)

    ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True)
    expect_ihh0 = [0, 10 * 2 / 3, x, x]
    assert_array_almost_equal(expect_ihh0, ihh0)
    expect_ihh1 = [0, 10 * 2 / 3, x, x]
    assert_array_almost_equal(expect_ihh1, ihh1)