def ExamplePlotFrozenDist(): """Example of PlotFrozenDist.""" from pylab import figure, subplot from scipy.stats import norm, gamma, poisson, skellam print("Example: Norm, Norm, Gamma, Gamma, Poisson, Poisson, Skellam, Skellam.") figure() subplot(421) PlotFrozenDist( norm() ) subplot(422) PlotFrozenDist( norm( loc=10, scale=0.5) ) subplot(423) PlotFrozenDist( gamma(3) ) subplot(424) ga = gamma( 3, loc=5) PlotFrozenDist( ga, q=1e-6, color='g') mn = float(ga.stats()[0]) ### add a red vline at the mean vlines( mn, 0, ga.pdf(mn), colors='r', linestyles='-') subplot(425) PlotFrozenDist(poisson(5.3)) subplot(426) PlotFrozenDist( poisson(10), color='r', ms=3) ## Pass other plotting arguments subplot(427) PlotFrozenDist( skellam( 5.3, 10), marker='*') subplot(428) PlotFrozenDist( skellam( 100, 10), marker='+', no_vlines=True)
def ExamplePlotFrozenDist(): """Example of PlotFrozenDist. See :meth:`plotfrozen.PlotFrozenDist` """ from pylab import figure, subplot from scipy.stats import norm, gamma, poisson, skellam from matplotlib.pyplot import vlines figure() subplot(421) PlotFrozenDist(norm()) subplot(422) PlotFrozenDist(norm(loc=10, scale=0.5)) subplot(423) PlotFrozenDist(gamma(3)) subplot(424) ga = gamma(3, loc=5) PlotFrozenDist(ga, q=1e-6, color='g') mn = float(ga.stats()[0]) # add a red vline at the mean vlines(mn, 0, ga.pdf(mn), colors='r', linestyles='-') subplot(425) PlotFrozenDist(poisson(5.3)) subplot(426) PlotFrozenDist(poisson(10), color='r', ms=3) # Pass other plotting arguments subplot(427) PlotFrozenDist(skellam(5.3, 10), marker='*') subplot(428) PlotFrozenDist(skellam(100, 10))
def diff_estimate(): options = get_options() if options.accurate: # to be moved downward sometime raise NotImplementedError("Accurate estimate not yet implemented!") sys.exit(1) ftn5 = pysam.AlignmentFile(options.tn5, 'rb') ftnH = pysam.AlignmentFile(options.tnH, 'rb') n_reads5 = mapped_reads(ftn5, options.mito_chrom_name) n_readsH = mapped_reads(ftnH, options.mito_chrom_name) # genome sizes should be the same, but you'll never know # at a certain point one should also check that two files # have been aligned on the same reference... gs5 = genome_size(ftn5, options.mito_chrom_name) gsH = genome_size(ftnH, options.mito_chrom_name) # poisson lambda will be the average coverage per bin mu5 = n_reads5 / (gs5 / options.stepsize) muH = n_readsH / (gsH / options.stepsize) # now we can use a Skellam distribution # this will model the differences we count on each bin skd = sst.skellam(mu5, muH) # by default we will skip duplicated reads for chromosome in ftn5.references: if chromosome == options.mito_chrom_name: continue chr_len = ftn5.get_reference_length(chromosome) for start in range(0, chr_len, options.stepsize): stop = start + options.stepsize if stop > chr_len: stop = chr_len c5 = ftn5.count(contig=chromosome, start=start, stop=stop, read_callback='all') cH = ftnH.count(contig=chromosome, start=start, stop=stop, read_callback='all') d = c5 - cH p = skd.cdf(d) l = np.log(p) if p > 0.5: # more evidences on the right tail, the tn5 one l = -np.log(1 - p) if l > _MAXLOG: l = _MAXLOG if l < -_MAXLOG: l = -_MAXLOG # write to stdout # one day I will add direct bigwig support sys.stdout.write(f'{chromosome}\t{start}\t{stop}\t{l:.5e}\n')
def main(): dist = skellam(20, 1.2) events = [{ 'events': 5, 'rndm': np.random.randint(0, 2147483647) }, { 'events': 8, 'rndm': np.random.randint(0, 2147483647) }] func = partial(test_func, dist) with Pool(2) as p: res = p.map(func, events) print(res) print('donzo')
fig, ax = plt.subplots(figsize=(6, 6)) rmin, rmax = max(1, n - 10), n + 10 for i, Ns in enumerate([1 / 1000, 5, 10, 50]): s = Ns / N n_range = np.arange(rmin, rmax + 1) # Exact distribution dist = [dist_num_anc(a, n, x, s, N, rocc) for a in n_range] kwargs = dict(n=n, N=N, s=Ns / N) ax.plot(n_range, dist, ls="", marker="o", color=f"C{i}") # Skellam mu1 = n * s mu2 = n * (n - 1) / (2 * N) s = skellam(mu1, mu2) s_range = np.arange(-n, rmax) # ATTN: note that +n is added to the support of the distribution here ax.plot(s_range + n, s.pmf(s_range), label=Ns, ls="--", color=f"C{i}") ax.legend(title="Ns", loc="upper left") shared_args = dict( xlabel="Number of contributing lineages", xlim=(rmin, rmax), ylim=(-0.005, 1), xticks=list(range(rmin, rmax, 5)), ) ax.set(ylabel="Probability", title="Skellam approximation", **shared_args) ax.fill_between([n, rmax], [-10, -10], [10, 10], color="red", alpha=0.05)
def main(): plt.rcParams['figure.autolayout'] = True threads = 15 cent_edges = [ 6, 9, 12, 17, 24, 32, 42, 54, 69, 86, 106, 129, 156, 188, 226, 271 ] percentiles = [] mu1, mu2 = 20.9, 0.2 mu_bar = (mu1 + mu2) / 2 mu_delta = mu1 - mu2 dist = skellam(mu1, mu2) binning = np.arange(-0.5, mu_bar * 2 * 10 + 1.5, 1) trials = 1000 moment_pars = { 'c2': { 'method': get_c2_meas, 'true': 2 * mu_bar }, 'c3': { 'method': get_c3_meas, 'true': mu_delta }, 'c4': { 'method': get_c4_meas, 'true': 2 * mu_bar }, 'c5': { 'method': get_c5_meas, 'true': mu_delta }, 'c6': { 'method': get_c6_meas, 'true': 2 * mu_bar }, 'k2': { 'method': get_k2_meas, 'true': 2 * mu_bar }, 'k3': { 'method': get_k3_meas, 'true': mu_delta }, 'k4': { 'method': get_k4_meas, 'true': 2 * mu_bar }, 'k5': { 'method': get_k5_meas, 'true': mu_delta }, 'k6': { 'method': get_k6_meas, 'true': 2 * mu_bar }, 'c4/c2': { 'method': get_c4_div_c2_meas, 'true': 1 }, 'k4/k2': { 'method': get_k4_div_k2_meas, 'true': 1 }, 'c6/c2': { 'method': get_c6_div_c2_meas, 'true': 1 }, 'k6/k2': { 'method': get_k6_div_k2_meas, 'true': 1 }, 'c4/c2 - k4/k2': { 'method': get_c4_div_c2_sub_k4_div_k2_meas, 'true': 1 }, 'c6/c2 - k6/k2': { 'method': get_c6_div_c2_sub_k6_div_k2_meas, 'true': 1 }, } save_path = '/home/dylan/Desktop/' emulate_data(cent_edges, dist, binning, trials, moment_pars, percentiles, threads, save_path) print('donzo')
def main(): plt.rcParams['figure.autolayout'] = True # num_events = np.asarray(np.arange(2, 101, 1)) # percentiles = [5, 30, 50, 70, 95] # n, p = 20, 0.4 # q = 1 - p # dist = binom(n, q) # binning = np.arange(-0.5, 20 + 1.5, 1) # trials = 100 # moment_pars = {'c2': {'method': lambda x: x.get_cumulant(2), 'true': n*p*q}, # 'c3': {'method': lambda x: x.get_cumulant(3), 'true': n*p*q*(1-2*p)}, # 'c4': {'method': lambda x: x.get_cumulant(4), # 'true': n * p * q * (1 + (3 * n - 6) * p * q) - 3 * (n*p*q)**2}, # 'k2': {'method': lambda x: x.get_k_stat(2), 'true': n*p*q}, # 'k3': {'method': lambda x: x.get_k_stat(3), 'true': n*p*q*(1-2*p)}, # 'k4': {'method': lambda x: x.get_k_stat(4), # 'true': n * p * q * (1 + (3 * n - 6) * p * q) - 3 * (n*p*q)**2}} # num_events = np.asarray(np.arange(10, 101, 1)) # percentiles = [] # # mu = 5 # dist = poisson(mu) # binning = np.arange(-0.5, mu * 10 + 1.5, 1) # trials = 1000 # moment_pars = {'c2': {'method': lambda x: x.get_cumulant(2).val, 'true': mu}, # 'c3': {'method': lambda x: x.get_cumulant(3).val, 'true': mu}, # 'c4': {'method': lambda x: x.get_cumulant(4).val, # 'true': mu}, # 'k2': {'method': lambda x: x.get_k_stat(2).val, 'true': mu}, # 'k3': {'method': lambda x: x.get_k_stat(3).val, 'true': mu}, # 'k4': {'method': lambda x: x.get_k_stat(4).val, # 'true': mu}, # 'c4/c2': {'method': lambda x: x.get_cumulant(4).val / x.get_cumulant(2).val, # 'true': 1}, # 'k4/k2': {'method': lambda x: x.get_k_stat(4).val / x.get_k_stat(2).val, # 'true': 1} # } num_events = np.asarray(np.arange(10, 1000, 10)) threads = 13 # num_events = np.asarray(np.arange(50, 5000, 10)) percentiles = [16, 84] mu1, mu2 = 20.9, 0.2 mu_bar = (mu1 + mu2) / 2 mu_delta = mu1 - mu2 dist = skellam(mu1, mu2) binning = np.arange(-0.5, mu_bar * 2 * 10 + 1.5, 1) trials = 10000 moment_pars = { 'c2': { 'method': get_c2, 'method_single': get_c2_meas, 'true': 2 * mu_bar }, 'c3': { 'method': get_c3, 'method_single': get_c3_meas, 'true': mu_delta }, 'c4': { 'method': get_c4, 'method_single': get_c4_meas, 'true': 2 * mu_bar }, 'c5': { 'method': get_c5, 'method_single': get_c5_meas, 'true': mu_delta }, 'c6': { 'method': get_c6, 'method_single': get_c6_meas, 'true': 2 * mu_bar }, 'k2': { 'method': get_k2, 'method_single': get_k2_meas, 'true': 2 * mu_bar }, 'k3': { 'method': get_k3, 'method_single': get_k3_meas, 'true': mu_delta }, 'k4': { 'method': get_k4, 'method_single': get_k4_meas, 'true': 2 * mu_bar }, 'k5': { 'method': get_k5, 'method_single': get_k5_meas, 'true': mu_delta }, 'k6': { 'method': get_k6, 'method_single': get_k6_meas, 'true': 2 * mu_bar }, 'c4/c2': { 'method': get_c4_div_c2, 'method_single': get_c4_div_c2_meas, 'true': 1 }, 'k4/k2': { 'method': get_k4_div_k2, 'method_single': get_k4_div_k2_meas, 'true': 1 }, 'c6/c2': { 'method': get_c6_div_c2, 'method_single': get_c6_div_c2_meas, 'true': 1 }, 'k6/k2': { 'method': get_k6_div_k2, 'method_single': get_k6_div_k2_meas, 'true': 1 }, 'c4/c2 - k4/k2': { 'method': get_c4_div_c2_sub_k4_div_k2, 'method_single': get_c4_div_c2_sub_k4_div_k2_meas, 'true': 1 }, 'c6/c2 - k6/k2': { 'method': get_c6_div_c2_sub_k6_div_k2, 'method_single': get_c6_div_c2_sub_k6_div_k2_meas, 'true': 1 }, } save_path = '/home/dylan/Desktop/' # demo_plots(dist) sim_single_trial(dist, num_events, binning, moment_pars, threads) # sim_trials(dist, num_events, trials, binning, percentiles, moment_pars, threads) # start = time.time() # event_means, event_errs, event_percs = simulate(dist, num_events, trials, binning, moment_pars, percentiles) # print(f'Simulation time: {time.time() - start}s') # # plot_moments(num_events, event_means, event_errs, event_percs, moment_pars, percentiles) # plot_moments_together(num_events, event_means, event_errs, event_percs, moment_pars, percentiles) # # plot_cumulants(num_events, event_means, event_errs, event_percs, moment_pars, percentiles) # plot_cumulant_ratios(num_events, event_means, event_errs, event_percs, moment_pars, percentiles) # plot_ratios(num_events, event_means, event_errs, event_percs, moment_pars, percentiles) print('donzo')
from math import exp, factorial from scipy.stats import skellam def poisson(l, k): """ Poission function with interval: l """ return pow(l, k) * exp(-l) / factorial(k) print("Consider the game is the combination of the superposition of two independent poission processes N1 and N2.") print("Assuming team i is the home team while team j is the away.") mu1, mu2 = 1.55, 1.05 print(f"\nThe HAD pool can be modeled by N1 - N2, which is a skellam distribution with mu1 = {mu1} and mu2 = {mu2}.") print("Alternatively, we can do Sum(N1 * N2) over all cases that have k_n1 > k_n2, k_n1 = k_n2 or k_n1 < k_n2 with a cutoff at some threshold.") sk = skellam(mu1, mu2) had_draw = sk.pmf(0) had_away = sk.cdf(-1) had_home = 1.0 - had_draw - had_away print("The probablity of HAD HOME|AWAY|DRAW: {:.2f}%|{:.2f}%|{:.2f}%.".format(had_home*100, had_away*100, had_draw*100)) had_home_odds_true = 1.0 / had_home had_away_odds_true = 1.0 / had_away had_draw_odds_true = 1.0 / had_draw print("The decimal odds of HAD HOME|AWAY|DRAW: {:.2f}|{:.2f}|{:.2f}.".format(\ had_home_odds_true, had_away_odds_true, had_draw_odds_true)) had_margin = 0.123 had_home_odds_margin = 1.0 / had_home / (1 + had_margin) had_away_odds_margin = 1.0 / had_away / (1 + had_margin) had_draw_odds_margin = 1.0 / had_draw / (1 + had_margin)