def Cauchy_Entropy(w0, l0, w1, l1): if w0 == 0 and l0 == 0: x0 = 0.5 else: x0 = w0/(w0 + l0) # Mode of beta sig0 = np.sqrt( (w0+1)*(l0+1)/( (w0+l0+2)**2 * (w0+l0+3) ) ) # Std of beta if w1 == 0 and l1 ==0: x1 = 0.5 else: x1 = w1/(w1 + l1) sig1 = np.sqrt( (w1+1)*(l1+1)/( (w1+l1+2)**2 * (w1+l1+3) ) ) global MC_samples seq.reset() x = seq.get(MC_samples) x = np.reshape(x, MC_samples) pdf0 = cauchy.pdf(x, x0, sig0) pdf1 = cauchy.pdf(x, x1, sig1) cdf0 = cauchy.cdf(x, x0, sig0) cdf1 = cauchy.cdf(x, x1, sig1) rho = pdf0 * cdf1 + pdf1 * cdf0 integral = np.mean( -rho * np.log( rho ) ) return integral
def cauchy_distribution(select_size, asked=rvs, x=0): if asked == rvs: return cauchy.rvs(size=select_size) elif asked == pdf: return cauchy.pdf(x) elif asked == cdf: return cauchy.cdf(x)
def distribution_function(x, mu, sigma, distribution): if distribution == Distribution.NORMAL: return norm.cdf(x, mu, sigma) elif distribution == Distribution.CAUCHY: return cauchy.cdf(x, mu, sigma) elif distribution == Distribution.LAPLACE: return laplace.cdf(x, mu, sigma) elif distribution == Distribution.POISSON: return poisson.cdf(x, mu, sigma) elif distribution == Distribution.UNIFORM: return uniform.cdf(x, mu, sigma) else: return None
def cdf(self, x, df): return cauchy.cdf(x, self.loc, self.scl)
# prior constraints: # smooth, median(theta)=0, mode(theta)=1, theta can be (-inf,1),(-1,0),(0,1),(1,inf) with p=0.25 # taking a gaussian prior which satisfies the above constraints prior_var = 2.19 prior_mu = 0 prior_std = math.sqrt(prior_var) p_range = norm.cdf(1, prior_mu, prior_std) - norm.cdf(-1, prior_mu, prior_std) assert np.allclose(p_range, 0.5, 1e-2) # Computing posterior mean using gaussian prior post_var = 1 / (1 / obs_var + 1 / prior_var) post_mean = post_var * (prior_mu / prior_var + obs_x / obs_var) assert np.allclose(post_mean, 3.43, 1e-2) # taking a cauchy prior which satisfies the above constraints loc = 0 scale = 1 p_range = cauchy.cdf(1, loc, scale) - cauchy.cdf(-1, loc, scale) assert np.allclose(p_range, 0.5, 1e-2) # Computing posterior mean using cauchy prior inf = 5.2 lik = lambda theta: norm.pdf(obs_x, theta, obs_std) prior = lambda theta: cauchy.pdf(obs_x, theta, obs_std) post = lambda theta: lik(theta) * prior(theta) Z = integrate.quad(post, -inf, inf)[0] post_mean = integrate.quad(lambda theta: theta * post(theta) / Z, -inf, inf)[0] assert np.allclose(post_mean, 4.56, 1e-2)
def func(self, x): return cauchy.cdf(x, loc=self.mu, scale=self.sigma)
def __call__(self, x, loc=0, scale=1): return cauchy.cdf(x, loc=loc, scale=scale)
# Display the probability density function (``pdf``): x = np.linspace(cauchy.ppf(0.01), cauchy.ppf(0.99), 100) ax.plot(x, cauchy.pdf(x), 'r-', lw=5, alpha=0.6, label='cauchy pdf') # Alternatively, the distribution object can be called (as a function) # to fix the shape, location and scale parameters. This returns a "frozen" # RV object holding the given parameters fixed. # Freeze the distribution and display the frozen ``pdf``: rv = cauchy() ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') # Check accuracy of ``cdf`` and ``ppf``: vals = cauchy.ppf([0.001, 0.5, 0.999]) np.allclose([0.001, 0.5, 0.999], cauchy.cdf(vals)) # True # Generate random numbers: r = cauchy.rvs(size=1000) # And compare the histogram: ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2) ax.legend(loc='best', frameon=False) plt.show()
def main(): usage = 'usage: %prog [options] arg' parser = OptionParser(usage) parser.add_option('-o', dest='out_dir', default='sad_norm') parser.add_option( '-s', dest='sample', default=100000, type='int', help='Number of SNPs to sample for fit [Default: %default]') (options, args) = parser.parse_args() if len(args) != 1: parser.error('Must provide SAD HDF5 path') else: sad_h5_path = args[0] # retrieve chromosome SAD HDF5 files chr_sad_h5_files = sorted(glob.glob('%s/*/sad.h5' % sad_h5_path)) assert (len(chr_sad_h5_files) > 0) # clean out any existing fits # count SNPs across chromosomes num_snps = 0 for chr_sad_h5_file in chr_sad_h5_files: chr_sad_h5 = h5py.File(chr_sad_h5_file, 'r+') # delete fit params if 'target_cauchy_fit_loc' in chr_sad_h5.keys(): del chr_sad_h5['target_cauchy_fit_loc'] del chr_sad_h5['target_cauchy_fit_scale'] # delete norm params if 'target_cauchy_norm_loc' in chr_sad_h5.keys(): del chr_sad_h5['target_cauchy_norm_loc'] del chr_sad_h5['target_cauchy_norm_scale'] # count SNPs num_snps += chr_sad_h5['SAD'].shape[0] num_targets = chr_sad_h5['SAD'].shape[-1] chr_sad_h5.close() # sample SNPs across chromosomes sad = sample_sad(chr_sad_h5_files, options.sample, num_snps, num_targets) # initialize fit parameters target_cauchy_fit_loc = np.zeros(num_targets) target_cauchy_fit_scale = np.zeros(num_targets) # fit parameters for ti in range(num_targets): print('Fitting t%d' % ti, flush=True) cp = cauchy.fit(sad[:, ti]) target_cauchy_fit_loc[ti] = cp[0] target_cauchy_fit_scale[ti] = cp[1] del sad # write across chromosomes for chr_sad_h5_file in chr_sad_h5_files: chr_sad_h5 = h5py.File(chr_sad_h5_file, 'r+') chr_sad_h5.create_dataset('target_cauchy_fit_loc', data=target_cauchy_fit_loc) chr_sad_h5.create_dataset('target_cauchy_fit_scale', data=target_cauchy_fit_scale) chr_sad_h5.close() # compute normalization parameters for chr_sad_h5_file in chr_sad_h5_files: chr_sad5 = SAD5(chr_sad_h5_file) # QC fit table if not os.path.isdir(options.out_dir): os.mkdir(options.out_dir) fit_out = open('%s/fits.txt' % options.out_dir, 'w') for ti in range(num_targets): print('%-4d %7.1e %7.1e' % (ti, target_cauchy_fit_loc[ti], target_cauchy_fit_scale[ti]), file=fit_out) fit_out.close() # QC quantiles quantile_dir = '%s/quantiles' % options.out_dir if not os.path.isdir(quantile_dir): os.mkdir(quantile_dir) sad_qc = sample_sad(chr_sad_h5_files, 2048, num_snps, num_targets) for ti in np.linspace(0, num_targets - 1, 64, dtype='int'): # compute cauchy and argsort quantiles cauchy_q = cauchy.cdf(sad_qc[:, ti], loc=target_cauchy_fit_loc[ti], scale=target_cauchy_fit_scale[ti]) sort_i = np.argsort(sad_qc[:, ti]) quantile_pdf = '%s/t%d.pdf' % (quantile_dir, ti) jointplot(np.linspace(0, 1, len(sort_i)), cauchy_q[sort_i], quantile_pdf, square=True, cor=None, x_label='Empirical', y_label='Cauchy') # QC plots norm_dir = '%s/norm' % options.out_dir if not os.path.isdir(norm_dir): os.mkdir(norm_dir) chr_sad5 = SAD5(chr_sad_h5_files[0]) qc_sample = 2048 if qc_sample < chr_sad5.num_snps: ri = sorted( np.random.choice(np.arange(chr_sad5.num_snps), size=qc_sample, replace=False)) else: ri = np.arange(chr_sad5.num_snps) qc_sad_raw = chr_sad5.sad_matrix[ri] qc_sad_norm = chr_sad5[ri] for ti in np.linspace(0, num_targets - 1, 32, dtype='int'): plt.figure() sns.jointplot(qc_sad_raw[:, ti], qc_sad_norm[:, ti], joint_kws={ 'alpha': 0.5, 's': 10 }) plt.savefig('%s/t%d.pdf' % (norm_dir, ti)) plt.close()
ax[2].plot(x3, laplace.cdf(x3, 0, sqrt(2))) ax[2].hist(r3, density=True, bins=floor(len(r3)), histtype='step', cumulative=True) ax[2].set_xlabel('x') ax[2].set_title('Распределение Лапласа, n=100') #plt.show()#Лапласа fig2, ay = plt.subplots(1, 3) r = cauchy.rvs(size=20) y = np.linspace(min(cauchy.ppf(0.01), min(r)), max(cauchy.ppf(0.99), max(r)), 100) ay[0].plot(y, cauchy.cdf(y, 0, 1)) ay[0].hist(r, density=True, bins=floor(len(r)), histtype='step', cumulative=True) ay[0].set_xlabel('x') ay[0].set_title('Распределение Коши, n=20') r2 = cauchy.rvs(size=60) y2 = np.linspace(min(cauchy.ppf(0.01), min(r2)), max(cauchy.ppf(0.99), max(r2)), 100) ay[1].plot(y2, cauchy.cdf(y2, 0, 1)) ay[1].hist(r2, density=True, bins=floor(len(r2)),
def main(): usage = "usage: %prog [options] arg" parser = OptionParser(usage) parser.add_option("-o", dest="out_dir", default="sad_norm") parser.add_option( "-s", dest="sample", default=100000, type="int", help="Number of SNPs to sample for fit [Default: %default]", ) (options, args) = parser.parse_args() if len(args) != 1: parser.error("Must provide SAD HDF5 path") else: sad_h5_path = args[0] # retrieve chromosome SAD HDF5 files chr_sad_h5_files = sorted(glob.glob("%s/*/sad.h5" % sad_h5_path)) assert len(chr_sad_h5_files) > 0 # clean out any existing fits # count SNPs across chromosomes num_snps = 0 for chr_sad_h5_file in chr_sad_h5_files: chr_sad_h5 = h5py.File(chr_sad_h5_file, "r+") # delete fit params if "target_cauchy_fit_loc" in chr_sad_h5.keys(): del chr_sad_h5["target_cauchy_fit_loc"] del chr_sad_h5["target_cauchy_fit_scale"] # delete norm params if "target_cauchy_norm_loc" in chr_sad_h5.keys(): del chr_sad_h5["target_cauchy_norm_loc"] del chr_sad_h5["target_cauchy_norm_scale"] # count SNPs num_snps += chr_sad_h5["SAD"].shape[0] num_targets = chr_sad_h5["SAD"].shape[-1] chr_sad_h5.close() # sample SNPs across chromosomes sad = sample_sad(chr_sad_h5_files, options.sample, num_snps, num_targets) # initialize fit parameters target_cauchy_fit_loc = np.zeros(num_targets) target_cauchy_fit_scale = np.zeros(num_targets) # fit parameters for ti in range(num_targets): print("Fitting t%d" % ti, flush=True) cp = cauchy.fit(sad[:, ti]) target_cauchy_fit_loc[ti] = cp[0] target_cauchy_fit_scale[ti] = cp[1] del sad # write across chromosomes for chr_sad_h5_file in chr_sad_h5_files: chr_sad_h5 = h5py.File(chr_sad_h5_file, "r+") chr_sad_h5.create_dataset("target_cauchy_fit_loc", data=target_cauchy_fit_loc) chr_sad_h5.create_dataset( "target_cauchy_fit_scale", data=target_cauchy_fit_scale ) chr_sad_h5.close() # compute normalization parameters for chr_sad_h5_file in chr_sad_h5_files: chr_sad5 = SAD5(chr_sad_h5_file) # QC fit table if not os.path.isdir(options.out_dir): os.mkdir(options.out_dir) fit_out = open("%s/fits.txt" % options.out_dir, "w") for ti in range(num_targets): print( "%-4d %7.1e %7.1e" % (ti, target_cauchy_fit_loc[ti], target_cauchy_fit_scale[ti]), file=fit_out, ) fit_out.close() # QC quantiles quantile_dir = "%s/quantiles" % options.out_dir if not os.path.isdir(quantile_dir): os.mkdir(quantile_dir) sad_qc = sample_sad(chr_sad_h5_files, 2048, num_snps, num_targets) for ti in np.linspace(0, num_targets - 1, 64, dtype="int"): # compute cauchy and argsort quantiles cauchy_q = cauchy.cdf( sad_qc[:, ti], loc=target_cauchy_fit_loc[ti], scale=target_cauchy_fit_scale[ti], ) sort_i = np.argsort(sad_qc[:, ti]) quantile_pdf = "%s/t%d.pdf" % (quantile_dir, ti) jointplot( np.linspace(0, 1, len(sort_i)), cauchy_q[sort_i], quantile_pdf, square=True, cor=None, x_label="Empirical", y_label="Cauchy", ) # QC plots norm_dir = "%s/norm" % options.out_dir if not os.path.isdir(norm_dir): os.mkdir(norm_dir) chr_sad5 = SAD5(chr_sad_h5_files[0]) qc_sample = 2048 if qc_sample < chr_sad5.num_snps: ri = sorted( np.random.choice( np.arange(chr_sad5.num_snps), size=qc_sample, replace=False ) ) else: ri = np.arange(chr_sad5.num_snps) qc_sad_raw = chr_sad5.sad_matrix[ri] qc_sad_norm = chr_sad5[ri] for ti in np.linspace(0, num_targets - 1, 32, dtype="int"): plt.figure() sns.jointplot( qc_sad_raw[:, ti], qc_sad_norm[:, ti], joint_kws={"alpha": 0.5, "s": 10} ) plt.savefig("%s/t%d.pdf" % (norm_dir, ti)) plt.close()
font = ImageFont.truetype(font_loc, 12) one_sided = False for i in range(17): ix = zigzag2(i, 0, 10) im = Image.new("RGB", (512, 512), "black") draw = ImageDraw.Draw(im, 'RGBA') if one_sided: fn = lambda x: 300 - norm.cdf(x, 250, std) * 100 else: fn_tmp = lambda x: min(norm.cdf(x, 250, std), norm.sf(x, 250, std)) fn = lambda x: 300 - (fn_tmp(x)) * 200 draw_curve(fn, draw, rgba=(255, 165, 0)) if one_sided: fn1 = lambda x: 300 - cauchy.cdf(x, 250, std) * 100 else: fn_tmp1 = lambda x: min(cauchy.cdf(x, 250, std), cauchy.sf( x, 250, std)) fn1 = lambda x: 300 - (fn_tmp1(x)) * 200 draw_curve(fn1, draw, rgba=(222, 49, 99)) draw.line((250, 200, 250, 300), "white") draw.line((0, 300, 512, 300), "yellow") y1 = 300 x1 = 50 + ix / 10 * (450 - 50) draw.ellipse((x1 - 3, y1 - 3, x1 + 3, y1 + 3), outline=(255, 255, 0), fill=(255, 255, 0, 150)) x2 = 250 y1 = fn(x1)
from scipy.stats import cauchy print(cauchy.cdf(2,3,3))
ax[1][2].legend() plt.show()#Лапласа fig2, ay = plt.subplots(2, 3) r = cauchy.rvs(size=10) y = np.linspace(min(cauchy.ppf(0.01),min(r)), max(cauchy.ppf(0.99),max(r)), 100) ay[0][0].plot(y, cauchy.pdf(y,0,1), label='теор.') ay[0][0].set_title('Распределение Коши, n=10') ay[0][0].hist(r, density=True, bins=floor(sqrt(len(r))),histtype='step',label='практ.') ay[0][0].legend(loc='best', frameon=False) ay[0][0].set_xlabel('x') ay[0][0].set_ylabel('Функция плотности') ay[0][0].legend() ay[1][0].plot(y, cauchy.cdf(y,0,1), label='теор.') ay[1][0].hist(r, density=True, bins=floor(sqrt(len(r))),histtype='step',cumulative=True,label='практ.') ay[1][0].legend(loc='best', frameon=False) ay[1][0].set_xlabel('x') ay[1][0].set_ylabel('Функция распределения') ay[1][0].legend() r2 = cauchy.rvs(size=100) y2 = np.linspace(min(cauchy.ppf(0.01),min(r2)), max(cauchy.ppf(0.99),max(r2)), 100) ay[0][1].plot(y2, cauchy.pdf(y2,0,1), label='теор.') ay[0][1].set_title('Распределение Коши, n=100') ay[0][1].hist(r2, density=True, bins=floor(sqrt(len(r2))),histtype='step',label='практ.') ay[0][1].legend(loc='best', frameon=False) ay[0][1].set_xlabel('x') ay[0][1].set_ylabel('Функция плотности') ay[0][1].legend()