Ejemplo n.º 1
0
def test_ihs_data():
    h = np.hstack([hap1, hap2])
    pos = np.arange(1, h.shape[0] + 1)
    expect = np.log(5.5/1.5)

    for use_threads in True, False:
        for include_edges in True, False:
            score = ihs(h, pos, include_edges=include_edges,
                        use_threads=use_threads)
            actual = score[9]
            assert expect == actual
Ejemplo n.º 2
0
def test_ihs():
    n_variants = 1000
    n_haplotypes = 20
    h = np.random.randint(0, 2, size=(n_variants, n_haplotypes)).astype('i1')
    pos = np.arange(0, n_variants * 10, 10)

    for use_threads in True, False:
        for min_ehh in 0, 0.05, 0.5:
            for include_edges in True, False:
                score = ihs(h, pos, min_ehh=min_ehh,
                            include_edges=include_edges,
                            use_threads=use_threads)
                assert isinstance(score, np.ndarray)
                assert (n_variants,) == score.shape
                assert np.dtype('f8') == score.dtype

    with pytest.raises(ValueError):
        ihs(h, pos[1:])

    with pytest.raises(ValueError):
        ihs(h, pos, map_pos=pos[1:])
def ihs(haplotype, pos_vec, window=None):
    """Compute the standardize integrated haplotype score"""

    ihs = allel.ihs(haplotype, pos_vec, min_maf=0.01, include_edges=True)
    ihs_stand, bins = allel.standardize_by_allele_count(
        ihs, haplotype.count_alleles().T[1], diagnostics=False)
    if window:
        di = pd.DataFrame(ihs_stand, columns=["iHS"])
        di["pos_cat"] = pd.cut(pos_vec, window, labels=range(1, window + 1))
        dig = di.groupby("pos_cat").iHS.mean()
        return dig
    else:
        return ihs_stand
Ejemplo n.º 4
0
import seaborn as sns
import pandas as pd
chromlist = ["Wb_Chr1_0", "Wb_Chr1_1", "Wb_Chr2_0", "Wb_Chr2_1", "Wb_Chr2_2",
             "Wb_Chr2_3", "Wb_Chr3_0", "Wb_Chr3_1", "Wb_Chr4_0", "Wb_Chr4_1",
             "Wb_Chr4_2"]
seldict = {}
for c in chromlist:
    callset = h5py.File("PNG.phased.autosomal.recode.{}.h5".format(c), mode='r')
    samples = callset['samples'][:]
    sample_name = [sid.decode() for sid in samples.tolist()]
    g = allel.GenotypeChunkedArray(callset["calldata/GT"])
    h = g.to_haplotypes()
    pos = allel.SortedIndex(callset["variants/POS"][:])
    acc = h.count_alleles()[:, 1]
    # ihs
    ihs = allel.ihs(h, pos, include_edges=True)
    ihs_std = allel.standardize_by_allele_count(ihs, acc)
    plt.plot(pos, -np.log10(ihs_std[0]))
    nan = ~np.isnan(ihs)
    ihs_real = ihs[nan]
    pos_ihs = pos[nan]
    # nsl
    nsl = allel.nsl(h)
    nsl_std = allel.standardize_by_allele_count(nsl, acc)
    plt.plot(pos, -np.log10(nsl_std[0]))
    nan = ~np.isnan(ihs)
    nsl_real = ihs[nan]
    pos_nsl = pos[nan]
    seldict[c] = (ihs_std[0], nsl_std[0])
    ## ehh is site dependent site dependent
    #ehh = allel.ehh_decay(h)
Ejemplo n.º 5
0
    "Wb_Chr1_0", "Wb_Chr1_1", "Wb_Chr2_0", "Wb_Chr2_1", "Wb_Chr2_2",
    "Wb_Chr2_3", "Wb_Chr3_0", "Wb_Chr3_1", "Wb_Chr4_0", "Wb_Chr4_1",
    "Wb_Chr4_2"
]
seldict = {}
for c in chromlist:
    callset = h5py.File("PNG.phased.autosomal.recode.{}.h5".format(c),
                        mode='r')
    samples = callset['samples'][:]
    sample_name = [sid.decode() for sid in samples.tolist()]
    g = allel.GenotypeChunkedArray(callset["calldata/GT"])
    h = g.to_haplotypes()
    pos = allel.SortedIndex(callset["variants/POS"][:])
    acc = h.count_alleles()[:, 1]
    # ihs
    ihs = allel.ihs(h, pos, include_edges=True)
    ihs_std = allel.standardize_by_allele_count(ihs, acc)
    plt.plot(pos, -np.log10(ihs_std[0]))
    nan = ~np.isnan(ihs)
    ihs_real = ihs[nan]
    pos_ihs = pos[nan]
    # nsl
    nsl = allel.nsl(h)
    nsl_std = allel.standardize_by_allele_count(nsl, acc)
    plt.plot(pos, -np.log10(nsl_std[0]))
    nan = ~np.isnan(ihs)
    nsl_real = ihs[nan]
    pos_nsl = pos[nan]
    seldict[c] = (ihs_std[0], nsl_std[0])
    ## ehh is site dependent site dependent
    #ehh = allel.ehh_decay(h)