Ejemplo n.º 1
0
def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes,
                       chunks):
    ts = simulate_ts(sample_size, length=200)
    ds = ts_to_dataset(ts, chunks)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds,
        ts,
        n_cohorts,
        cohort_key_names=["cohorts_0", "cohorts_1",
                          "cohorts_2"])  # type: ignore[no-untyped-call]
    ds = window(ds, size=25)

    ds = pbs(ds, cohorts=cohorts)

    # scikit-allel
    for i, j, k in itertools.combinations(range(n_cohorts), 3):
        stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}",
                                       cohorts_1=f"co_{j}",
                                       cohorts_2=f"co_{k}").values)

        if cohort_indexes is not None and (i, j, k) not in cohort_indexes:
            np.testing.assert_array_equal(stat_pbs,
                                          np.full_like(stat_pbs, np.nan))
        else:
            ac_i = ds.cohort_allele_count.values[:, i, :]
            ac_j = ds.cohort_allele_count.values[:, j, :]
            ac_k = ds.cohort_allele_count.values[:, k, :]

            ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=25)

            # scikit-allel has final window missing
            np.testing.assert_allclose(stat_pbs[:-1], ska_pbs_value)
Ejemplo n.º 2
0
def test_pbs(sample_size, n_cohorts):
    ts = simulate_ts(sample_size)
    ds = ts_to_dataset(ts)  # type: ignore[no-untyped-call]
    ds, subsets = add_cohorts(
        ds,
        ts,
        n_cohorts,
        cohort_key_names=["cohorts_0", "cohorts_1",
                          "cohorts_2"])  # type: ignore[no-untyped-call]
    n_variants = ds.dims["variants"]
    ds = window(ds, size=n_variants)  # single window

    ds = pbs(ds)

    # scikit-allel
    for i, j, k in itertools.combinations(range(n_cohorts), 3):
        stat_pbs = (ds["stat_pbs"].sel(cohorts_0=f"co_{i}",
                                       cohorts_1=f"co_{j}",
                                       cohorts_2=f"co_{k}").values)

        ac_i = ds.cohort_allele_count.values[:, i, :]
        ac_j = ds.cohort_allele_count.values[:, j, :]
        ac_k = ds.cohort_allele_count.values[:, k, :]

        ska_pbs_value = allel.pbs(ac_i, ac_j, ac_k, window_size=n_variants)

        np.testing.assert_allclose(stat_pbs, ska_pbs_value)
Ejemplo n.º 3
0
def meanPBS(ac1, ac2, ac3, window_size, normalise):
    # pbs per variant
    pbs = allel.pbs(ac1, ac2, ac3, window_size=window_size, normed=normalise)
    # get average of all pbs values (will be per gene)
    meanpbs = np.nanmean(pbs)

    _, se, stats = allel.stats.misc.jackknife(pbs,
                                              statistic=lambda n: np.mean(n))

    return (meanpbs, se, pbs, stats)
Ejemplo n.º 4
0
def meanPBS(ac1, ac2, ac3, window_size, normalise):
    """
    This function calculate PBS on allele counts arrays and then takes the mean of all pbs values.
    """
    #pbs per variant
    pbs = allel.pbs(ac1, ac2, ac3, window_size=window_size, normed=normalise)
    #get average of all pbs values (will be per gene)
    meanpbs = np.nanmean(pbs)

    _, se, stats = allel.stats.misc.jackknife(pbs,
                                              statistic=lambda n: np.mean(n))

    return (meanpbs, se, pbs, stats)
Ejemplo n.º 5
0
def test_pbs():

    # minimal input data, sanity check for output existence and type
    ac1 = [[2, 0], [0, 2], [1, 1], [2, 0], [0, 2]]
    ac2 = [[1, 1], [2, 0], [0, 2], [2, 0], [0, 2]]
    ac3 = [[0, 2], [1, 1], [2, 0], [2, 0], [0, 2]]
    ret = pbs(ac1, ac2, ac3, window_size=2, window_step=1)
    assert isinstance(ret, np.ndarray)
    assert 1 == ret.ndim
    assert 4 == ret.shape[0]
    assert 'f' == ret.dtype.kind
    # regression check
    expect = [0.52349464, 0., -0.85199356, np.nan]
    assert_array_almost_equal(expect, ret)
    # final value is nan because variants in final window are non-segregating
    assert np.isnan(ret[3])
Ejemplo n.º 6
0
                chrom=chrom,
                ylim=0.5,
                save=True)

    #### Population Branch Statistic (PBS) in windows ####
    if pbs:
        for pbscomp in pbscomps:
            pop1, pop2, outpop = pbscomp.split("_")
            cohortText = f"(({pop1}, {pop2}), {outpop})"
            print(f"Calculating PBS values in sliding window for {pbscomp}\n")

            for wname, size, step in zip(windownames, windowsizes,
                                         windowsteps):
                pbsArray = allel.pbs(acsubpops[pop1],
                                     acsubpops[pop2],
                                     acsubpops[outpop],
                                     window_size=size,
                                     window_step=step,
                                     normed=True)
                midpoint = allel.moving_statistic(pos,
                                                  np.median,
                                                  size=size,
                                                  step=step)

                cohortNoSpaceText = pbscomp + "." + wname
                rnaseqpop.plotWindowed(
                    statName="PBS",
                    cohortText=cohortText,
                    cohortNoSpaceText=cohortNoSpaceText,
                    values=pbsArray,
                    midpoints=midpoint,
                    colour='dodgerblue',