예제 #1
0
 def test_sfs_scaled(self):
     dac = [0, 1, 2, 1]
     expect = [0, 2, 2]
     actual = allel.sfs_scaled(dac)
     aeq(expect, actual)
     for dtype in 'u2', 'i2', 'u8', 'i8':
         daca = np.asarray(dac, dtype=dtype)
         actual = allel.sfs_scaled(daca)
         aeq(expect, actual)
예제 #2
0
def test_sfs_scaled():
    dac = [0, 1, 2, 1]
    expect = [0, 2, 2]
    actual = allel.sfs_scaled(dac)
    assert_array_equal(expect, actual)
    for dtype in 'u2', 'i2', 'u8', 'i8':
        daca = np.asarray(dac, dtype=dtype)
        actual = allel.sfs_scaled(daca)
        assert_array_equal(expect, actual)
    # explicitly provide number of chromosomes
    expect = [0, 2, 2, 0]
    actual = allel.sfs_scaled(dac, n=3)
    assert_array_equal(expect, actual)
    with pytest.raises(ValueError):
        allel.sfs_scaled(dac, n=1)
예제 #3
0
def sfs_plot(c, ac_subpops, save=True, fold=True, scale=True):
    """
    note: should filter on segregating if only using subset of pops
    note: only biallelic if >1 allele
    is_biallelic_01 = ac_seg['all'].is_biallelic_01()[:]
    ac1 = ac_seg['BFM'].compress(is_biallelic_01, axis=0)[:, :2]
    ac2 = ac_seg['AOM'].compress(is_biallelic_01, axis=0)[:, :2]
    """
    sfsdict = {}
    fig, ax = plt.subplots(figsize=(8, 5))
    sns.despine(ax=ax, offset=10)
    for pop in ac_subpops.keys():
        acu = ac_subpops[pop]
        flt = acu.is_segregating() & (acu.max_allele() == 1)
        print('SFS : retaining', np.count_nonzero(flt), 'SNPs')
        # ac1 = allel.AlleleCountsArray(ac_subpops[pop].compress(flt, axis=0)[:, :2])
        ac1 = allel.AlleleCountsArray(ac_subpops[pop].compress(flt, axis=0))
        if fold and scale:
            sfs = allel.sfs_folded_scaled(ac1)
        elif fold and not scale:
            sfs = allel.sfs_folded(ac1)
        elif not fold and not scale:
            sfs = allel.sfs(ac1[:, 1])
        elif not fold and scale:
            sfs = allel.sfs_scaled(ac1[:, 1])
        sfsdict[pop] = sfs
        allel.stats.plot_sfs_folded_scaled(sfsdict[pop], ax=ax, label=pop,
                                           n=ac1.sum(axis=1).max())
    ax.legend()
    ax.set_title('{} Scaled folded site frequency spectra'.format(c))
    ax.set_xlabel('minor allele frequency')
    if save:
        fig.savefig("ScaledSFS-{}.pdf".format(c), bbox_inches='tight')
    return(sfsdict)
def sfs(haplotype, ac, nindiv=None, folded=False):
    """
    Compute sfs for SNP matrix
    """
    if nindiv == None:
        nindiv = haplotype.shape[1]
    tmp_df = pd.DataFrame({"N_indiv": range(1, nindiv)})
    if folded:
        df_sfs = pd.DataFrame(allel.sfs_folded(ac), columns=["count_SNP"])
        df_sfs["i_xi"] = allel.sfs_folded_scaled(ac)
        df_sfs.index.name = "N_indiv"
        df_sfs.reset_index(inplace=True)
        df_sfs = df_sfs.merge(tmp_df, on="N_indiv",
                              how="right").fillna(0).astype(int)
    else:
        df_sfs = pd.DataFrame(allel.sfs(ac.T[1]), columns=["count_SNP"])
        df_sfs["i_xi"] = allel.sfs_scaled(ac.T[1])
        df_sfs.index.name = "N_indiv"
        df_sfs.reset_index(inplace=True)
        df_sfs = df_sfs.merge(tmp_df, on="N_indiv",
                              how="right").fillna(0).astype(int)

    df_sfs["freq_indiv"] = df_sfs.N_indiv / nindiv
    return df_sfs