def test_masked_windowed_divergence(self): h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) mask = np.tile(np.repeat(np.array([True, False]), 5), 3) expect, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=5, start=1, stop=31) expect = expect[::2] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31, is_accessible=mask) assert_array_almost_equal(expect, actual)
def test_patterson_d(self): aca = [[0, 2], [2, 0], [2, 0], [1, 1], [0, 0]] acb = [[0, 2], [0, 2], [0, 2], [1, 1], [0, 2]] acc = [[2, 0], [2, 0], [0, 2], [1, 1], [0, 2]] acd = [[2, 0], [0, 2], [2, 0], [1, 1], [0, 2]] num, den = allel.patterson_d(aca, acb, acc, acd) expect_num = [0., 1., -1., 0., np.nan] expect_den = [0., 1., 1., 0.25, np.nan] assert_array_almost_equal(expect_num, num) assert_array_almost_equal(expect_den, den)
def test_masked_windowed_diversity(self): # four haplotypes, 6 pairwise comparison h = allel.HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() # mean pairwise diversity # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) mask = np.tile(np.repeat(np.array([True, False]), 5), 3) # expected is every other window with size 5 expect, _, _, _ = allel.windowed_diversity(pos, ac, size=5, start=1, stop=31) # only getting every other element expect = expect[::2] # actual is window of size 10 with the last half masked out actual, _, _, _ = allel.windowed_diversity(pos, ac, size=10, start=1, stop=31, is_accessible=mask) assert_array_almost_equal(expect, actual)
def test_windowed_tajima_d(self): from allel import windowed_tajima_d pos = np.array([1, 11, 21, 31, 41]) # example with calculable value ac = AlleleCountsArray([[1, 3], [2, 2], [3, 1], [1, 3], [2, 2]]) expect = np.array([0.168] * 3) actual, _, _ = windowed_tajima_d(pos, ac, size=25, step=10) assert_array_almost_equal(expect, actual, decimal=3) # too few sites actual, _, _ = windowed_tajima_d(pos, ac, size=15, step=10) assert 4 == len(actual) assert np.all(np.isnan(actual)) # too few segregating sites ac = AlleleCountsArray([[4, 0], [2, 2], [3, 1], [4, 0], [2, 2]]) actual, _, _ = windowed_tajima_d(pos, ac, size=25, step=10) assert 3 == len(actual) assert np.all(np.isnan(actual)) # allow people to override if they really want to expect = np.array([0.592] * 3) actual, _, _ = windowed_tajima_d(pos, ac, size=25, step=10, min_sites=2) assert_array_almost_equal(expect, actual, decimal=3)
def test_moving_tajima_d(self): from allel import moving_tajima_d # example with calculable value ac = AlleleCountsArray([[1, 3], [2, 2], [3, 1], [1, 3], [2, 2]]) expect = np.array([0.168] * 3) actual = moving_tajima_d(ac, size=3, step=1) assert_array_almost_equal(expect, actual, decimal=3) # too few sites actual = moving_tajima_d(ac, size=2, step=1) assert 4 == len(actual) assert np.all(np.isnan(actual)) # too few segregating sites ac = AlleleCountsArray([[4, 0], [2, 2], [3, 1], [4, 0], [2, 2]]) actual = moving_tajima_d(ac, size=3, step=1) assert 3 == len(actual) assert np.all(np.isnan(actual)) # allow people to override if they really want to expect = np.array([0.592] * 3) actual = moving_tajima_d(ac, size=3, step=1, min_sites=2) assert_array_almost_equal(expect, actual, decimal=3)
def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) assert_array_almost_equal(expect, actual)
def test_nsl01_scan_b(): h = np.array([[0, 0, 0, 1], [0, 0, 1, 0], [0, 1, 0, 0], [1, 0, 0, 0]]) nsl0, nsl1 = nsl01_scan(h) expect_nsl0 = [1, 4 / 3, 4 / 3, 4 / 3] assert_array_almost_equal(expect_nsl0, nsl0) expect_nsl1 = [np.nan, np.nan, np.nan, np.nan] assert_array_almost_equal(expect_nsl1, nsl1)
def test_nsl01_scan_a(): h = np.array([[0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1], [0, 0, 0, 1, 1, 1]]) nsl0, nsl1 = nsl01_scan(h) expect_nsl0 = [1, 2, 3, 4] assert_array_almost_equal(expect_nsl0, nsl0) expect_nsl1 = [1, 2, 3, 4] assert_array_almost_equal(expect_nsl1, nsl1)
def test_patterson_f3(self): aca = [[0, 2], [2, 0], [0, 2], [0, 2], [0, 0]] acb = [[2, 0], [0, 2], [0, 2], [0, 2], [0, 2]] acc = [[1, 1], [1, 1], [0, 2], [2, 0], [1, 1]] expect_f3 = [-.5, -.5, 0., 1., np.nan] actual_f3, actual_hzc = allel.patterson_f3(acc, aca, acb) assert_array_almost_equal(expect_f3, actual_f3) expect_hzc = [1., 1., 0., 0., 1.] assert_array_almost_equal(expect_hzc, actual_hzc)
def test_patterson_f2(self): aca = [[0, 2], [2, 0], [1, 1], [0, 0]] acb = [[0, 2], [0, 2], [0, 2], [0, 2]] expect = [0., 1., 0., np.nan] actual = allel.patterson_f2(aca, acb) assert_array_almost_equal(expect, actual)
def test_ihh_scan_d(): # edge case: start from 0 haplotype homozygosity gaps = np.array([10], dtype='f8') h = np.array([[0, 1], [1, 0]]) expect = [0, 0] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False) assert_array_almost_equal(expect, actual) expect = [0, 0] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True) assert_array_almost_equal(expect, actual)
def test_ihh_scan_a(): # simple case: 1 haplotype pair, haplotype homozygosity over all variants gaps = np.array([10, 10], dtype='f8') h = np.array([[0, 0], [0, 0], [0, 0]]) # do not include edges expect = [np.nan, np.nan, np.nan] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False) assert_array_almost_equal(expect, actual) # include edges expect = [0, 10, 20] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True) assert_array_almost_equal(expect, actual)
def test_ihh_scan_c(): # simple case: 1 haplotype pair, haplotype homozygosity decays gaps = np.array([10, 10], dtype='f8') h = np.array([[0, 1], [0, 0], [0, 0]]) # do not include edges expect = [0, 5, 15] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False) assert_array_almost_equal(expect, actual) # include edges expect = [0, 5, 15] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True) assert_array_almost_equal(expect, actual)
def test_ihh_scan_b(): # 1 haplotype pair, haplotype homozygosity over all variants # handling of large gap (encoded as -1) gaps = np.array([10, -1], dtype='f8') h = np.array([[0, 0], [0, 0], [0, 0]]) # do not include edges expect = [np.nan, np.nan, np.nan] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=False) assert_array_almost_equal(expect, actual) # include edges expect = [0, 10, np.nan] actual = ihh_scan(h, gaps, min_ehh=0, include_edges=True) assert_array_almost_equal(expect, actual)
def test_pbs(): # minimal input data, sanity check for output existence and type ac1 = [[2, 0], [0, 2], [1, 1], [2, 0], [0, 2]] ac2 = [[1, 1], [2, 0], [0, 2], [2, 0], [0, 2]] ac3 = [[0, 2], [1, 1], [2, 0], [2, 0], [0, 2]] ret = pbs(ac1, ac2, ac3, window_size=2, window_step=1) assert isinstance(ret, np.ndarray) assert 1 == ret.ndim assert 4 == ret.shape[0] assert 'f' == ret.dtype.kind # regression check expect = [0.52349464, 0., -0.85199356, np.nan] assert_array_almost_equal(expect, ret) # final value is nan because variants in final window are non-segregating assert np.isnan(ret[3])
def test_heterozygosity_expected(self): def refimpl(f, ploidy, fill=0): """Limited reference implementation for testing purposes.""" # check allele frequencies sum to 1 af_sum = np.sum(f, axis=1) # assume three alleles p = f[:, 0] q = f[:, 1] r = f[:, 2] out = 1 - p**ploidy - q**ploidy - r**ploidy with ignore_invalid(): out[(af_sum < 1) | np.isnan(af_sum)] = fill return out # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1] af = g.count_alleles().to_frequencies() expect2 = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_almost_equal(expect1, actual) assert_array_almost_equal(expect2, actual) expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0] actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0) assert_array_almost_equal(expect3, actual) # polyploid g = GenotypeArray([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') af = g.count_alleles().to_frequencies() expect = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_almost_equal(expect, actual)
def test_windowed_diversity(self): # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() # mean pairwise diversity # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(7 / 6) / 10, (13 / 6) / 10, 1 / 11] actual, _, _, _ = allel.windowed_diversity(pos, ac, size=10, start=1, stop=31) assert_array_almost_equal(expect, actual)
def test_inbreeding_coefficient(self): # diploid g = GenotypeArray( [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]) # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]) # expect = 1 - (ho/he) expect = [ -1, -1, 1 - 0, 1 - (.5 / .375), 1 - (.5 / .375), 1 - (.5 / .375), 1 - (1 / .5), 1 - (1 / .625), -1, 1 - (1 / .5), -1 ] actual = allel.inbreeding_coefficient(g, fill=-1) assert_array_almost_equal(expect, actual)
def test_windowed_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() # mean pairwise divergence # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(6 / 4) / 10, (9 / 4) / 10, 0 / 11] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31) assert_array_almost_equal(expect, actual)
def test_ihh01_scan_a(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 1], [0, 1, 1], [1, 1, 0], [1, 0, 0]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05, include_edges=False) expect_ihh0 = [np.nan, np.nan, np.nan, 5] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, 5, 5, np.nan] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, np.nan, np.nan, 5] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, 5, 5, np.nan] assert_array_almost_equal(expect_ihh1, ihh1)
def test_ihh01_scan_e(): # min_maf gaps = np.array([10, 10], dtype='f8') h = np.array([[0, 0, 1], [0, 0, 1], [0, 0, 1]]) expect_ihh0 = [0, 10, 20] expect_ihh1 = [np.nan, np.nan, np.nan] ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0, include_edges=True) assert_array_almost_equal(expect_ihh0, ihh0) assert_array_almost_equal(expect_ihh1, ihh1) expect_ihh0 = [np.nan, np.nan, np.nan] expect_ihh1 = [np.nan, np.nan, np.nan] ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, min_maf=0.4, include_edges=True) assert_array_almost_equal(expect_ihh0, ihh0) assert_array_almost_equal(expect_ihh1, ihh1)
def test_rogers_huff_r(self): gn = [[0, 1, 2], [0, 1, 2]] expect = 1. actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 1, 2], [2, 1, 0]] expect = -1. actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 0, 0], [0, 0, 0]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, 0, 0], [1, 1, 1]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[1, 1, 1], [1, 1, 1]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, -1, 0], [-1, 1, -1]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, 1, 0], [-1, -1, -1]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, 1, 0, 1], [0, 1, 1, 0]] expect = 0 actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 1, 2, -1], [0, 1, 2, 2]] expect = 1. actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 1, 2, 2], [0, 1, 2, -1]] expect = 1. actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 1, 2], [0, 1, -1]] expect = 1. actual = allel.rogers_huff_r(gn) assert expect == actual gn = [[0, 2], [2, 0], [0, 1]] expect = [-1, 1, -1] actual = allel.rogers_huff_r(gn) assert_array_almost_equal(expect, actual) gn = [[0, 2, 0], [0, 2, 0], [2, 0, 2], [0, 2, -1]] expect = [1, -1, 1, -1, 1, -1] actual = allel.rogers_huff_r(gn) assert_array_almost_equal(expect, actual)
def test_ihh01_scan_d(): gaps = np.array([10, 10, 10], dtype='f8') h = np.array([[0, 0, 1, 1, 1, 0], [0, 1, 0, 1, 0, 1], [1, 0, 0, 0, 1, 1], [0, 0, 0, 1, 1, 1]]) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0.05) x = (10 * (1 + 1 / 3) / 2) + (10 * (1 / 3 + 0) / 2) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0) expect_ihh0 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [np.nan, np.nan, x, x] assert_array_almost_equal(expect_ihh1, ihh1) ihh0, ihh1 = ihh01_scan(h, gaps, min_ehh=0, include_edges=True) expect_ihh0 = [0, 10 * 2 / 3, x, x] assert_array_almost_equal(expect_ihh0, ihh0) expect_ihh1 = [0, 10 * 2 / 3, x, x] assert_array_almost_equal(expect_ihh1, ihh1)