def chi2_dir(cause, effect, unknown, n, p_cause, p_effect_given_cause): cnt = count(zip(effect, unknown)) #print cnt chi_indep = chi2_contingency(cnt)[1] p_unknown_given_effect = [ float(cnt[0][1]) / sum(cnt[0]), float(cnt[1][1]) / sum(cnt[1]) ] #print 'p(bact|cd)=%s' % p_unknown_given_effect exp=[[0,0],[0,0]] for c in range(2): for e in range(2): for u in range(2): exp[c][u] += (n * p_of_val(p_cause, c) * p_of_val(p_effect_given_cause[c], e) * p_of_val(p_unknown_given_effect[e], u)) cnt = count(zip(cause, unknown)) #print "obs=%s" % cnt #print 'cnt=%s' % cnt #print 'expected if cd->bact=%s' % exp chi_rev = chisquare(cnt, exp, axis=None, ddof=2) chi_fwd = chi2_contingency(cnt) #print 'expected if bact->cd=%s' % chi_fwd[3] bayes_factor = chi2.pdf(chi_fwd[0],1) / chi2.pdf(chi_rev.statistic,1) return struct(reject_indep=chi_indep, bayes_fwd_rev=bayes_factor, reject_fwd=chi_fwd[1], reject_rev=chi_rev.pvalue)
def ej14(data_x, data_y, s): a2, a1 = np.polyfit(data_x, data_y, 1) chi2_t_list = [] chi2_f_list = [] for _ in range(1000): new_y = np.random.normal(a1 + a2 * data_x, s) p = np.polyfit(data_x, new_y, 1) chi2_t_list.append(sum(((new_y - a1 - a2 * data_x) / s)**2)) chi2_f_list.append(sum(((new_y - p[1] - p[0] * data_x) / s)**2)) plt.figure(1) my_hist(chi2_t_list, 20, label='Datos') x = np.linspace(0, 30, 100) chi211 = chi2.pdf(x, 11) plt.plot(x, chi211, label='$\chi^2_{11}$', color=sns.color_palette()[2]) plt.legend() plt.savefig('fig4.jpg') plt.figure(2) my_hist(chi2_f_list, 20, label='Datos') x = np.linspace(0, 30, 100) plt.plot(x, chi211, label='$\chi^2_{11}$', color=sns.color_palette()[2]) chi29 = chi2.pdf(x, 9) plt.plot(x, chi29, label='$\chi^2_{9}$', color=sns.color_palette()[3]) plt.legend() plt.savefig('fig5.jpg') plt.show()
def prob1(): """ using monte carlo methods """ #Part A using monte carlo methods A = np.array([np.random.randn(10**i)**2 for i in [2, 4, 6]]) bin = np.linspace(0, 1, 50) plt.subplot(131) plt.hist(A[0], bins=bin, density=True) plt.subplot(132) plt.hist(A[1], bins=bin, density=True) plt.subplot(133) plt.hist(A[2], bins=bin, density=True) #compute cdf answers_A1 = [] answers_A2 = [] answers_A3 = [] for j in range(3): for k in [.5, 1, 1.5]: legth = len(A[j]) answers_A1.append((np.sum(A[j] <= k) / legth, k, (j + 1) * 2)) mean = np.average(A[j]) L = A[j] - mean L = L**2 answers_A2.append(mean) answers_A3.append(np.sum(L) / len(L)) #part B built-in functions answers_B1 = [] answers_B2 = [] answers_B3 = [] plt.subplot(131) plt.plot(bin, chi2.pdf(bin, 1)) plt.subplot(132) plt.plot(bin, chi2.pdf(bin, 1)) plt.subplot(133) plt.plot(bin, chi2.pdf(bin, 1)) answers_B1 = [chi2.cdf(i, df=1) for i in [0.5, 1, 1.5]] plt.show() answers_B2.append(chi2.mean(1)) answers_B3.append(chi2.var(1)) return "for k = 2, 4, 6, x= 0.5, 1.0, 1.5, the expected value is the first term, x is the second term, k is the third term" + str( answers_A1 ) + " These are the standard erros for each expected value " + str( answers_A2) + str(answers_A3) + str( answers_B1) + " mean for chi_squared " + str(answers_B2) + str( answers_B3)
def calculate_bartlett_sphericity(x): """ Test the hypothesis that the correlation matrix is equal to the identity matrix.identity H0: The matrix of population correlations is equal to I. H1: The matrix of population correlations is not equal to I. The formula for Bartlett's Sphericity test is: .. math:: -1 * (n - 1 - ((2p + 5) / 6)) * ln(det(R)) Where R det(R) is the determinant of the correlation matrix, and p is the number of variables. Parameters ---------- x : array-like The array from which to calculate sphericity. Returns ------- statistic : float The chi-square value. p_value : float The associated p-value for the test. """ n, p = x.shape x_corr = corr(x) corr_det = np.linalg.det(x_corr) statistic = -np.log(corr_det) * (n - 1 - (2 * p + 5) / 6) degrees_of_freedom = p * (p - 1) / 2 p_value = chi2.pdf(statistic, degrees_of_freedom) return statistic, p_value
def plot_ts_vs_chi2(data, ext_list="ext1_ts", ndf_chi2=[1], subplot=[1, 2, 1], **kwargs): ax = plt.subplot(subplot[0], subplot[1], subplot[2]) ext_data = column(data, "%s" % ext_list) clean_data = [x for x in ext_data if not math.isnan(x)] # remove nan from data n, bins, patches = plt.hist( clean_data, int(math.ceil(max(column(data, "%s" % ext_list)))), normed=1, facecolor="green" ) bincenters = 0.5 * (bins[1:] + bins[:-1]) chi2_vals = [] colors = ["r", "b", "g"] for j in range(0, len(ndf_chi2)): chi2_vals.append(chi2.pdf(bincenters, ndf_chi2[j])) plt.plot(bincenters, chi2_vals[j], "%s--" % colors[j], linewidth=2.0, label="$\chi^2_%i$/2" % ndf_chi2[j]) legend = ax.legend(loc="upper right", frameon=False) plt.ylabel("PDF") plt.xlabel("TS$_{%s}$" % ext_list[0:4]) plt.yscale("log") plt.ylim([0.00001, 2.0]) ax = plt.subplot(subplot[0], subplot[1], subplot[2] + 1) n, bins, patches = plt.hist( clean_data, int(math.ceil(max(column(data, "%s" % ext_list)))), normed=1, facecolor="green", cumulative=-1 ) chi2_sfvals = [] for j in range(0, len(ndf_chi2)): chi2_sfvals.append(chi2.sf(bincenters, ndf_chi2[j])) plt.plot(bincenters, chi2_sfvals[j], "%s--" % colors[j], linewidth=2.0, label="$\chi^2_%i$/2" % ndf_chi2[j]) legend = ax.legend(loc="upper right", frameon=False) plt.ylabel("1-CDF") plt.xlabel("TS$_{%s}$" % ext_list[0:4]) plt.yscale("log") plt.ylim([0.00001, 2.0])
def Chi2Plots(instance, Nbins): """ This function plots the distribution of the likelihood function obtained by doing the MC toy experiments. The distribution should follow a chi^2 distribution with Nbins degrees of freedom. params : Nbins - int specifying the number of bins, i.e no. of dof. returns : """ binning = np.linspace(0, 30, 20) plt.hist(instance.lambda_true - instance.lambda_toys, normed=True, bins=binning, label='toy MC') x = np.linspace(0, 30, 1000) plt.plot(x, chi2.pdf(x, df=8), label=r'$\chi^2$ dof$=8$', lw=3, color='r') plt.legend(loc='best') pathToChi2Toys = '/mnt/t3nfs01/data01/shome/jandrejk/higgs_model_dep/MoriondAnalysis/plots/chi2_DistFromToys/' study_name = 'BSM1_compareDifferentResponseScenarios' pathSave = pathToChi2Toys + '/' + instance.obs + '/' + instance.mode + '/' ensure_dir(file_path=pathSave) plt.savefig(pathSave + 'VHchi2.png') plt.show()
def likelihoodRatio(idx, p, params1, params2, n=100): try: alpha1 = params1.alpha.mean() beta1 = params1.beta.mean() gamma1 = round(params1.gamma.mean()) + 1 except: alpha1 = params1.alpha beta1 = params1.beta gamma1 = round(params1.gamma) + 1 try: alpha2 = params2.alpha.mean() beta2 = params2.beta.mean() gamma2 = round(params2.gamma.mean()) + 1 except: alpha2 = params2.alpha beta2 = params2.beta gamma2 = round(params2.gamma) + 1 sum1 = logLikelihood([int(x) for x in round(p)], params1, n, idx) sum2 = logLikelihood([int(x) for x in round(p)], params2, n, idx) ratio = sum1 - sum2 if ratio >= 0: logStatus(Status(1, idx, "Could not perform likelihood ratio test.")) return float('nan') pVal = chi2.pdf(-2 * ratio, 3) return pVal
def calculate_daughter(self, x): """ Calculate daughter probability for amplitude """ # c = x*self.__emittance/(self.__parent_emittance**2) c = x / self.__emittance return chi2.pdf(c, self.__ndf)
def plot_A0(self): sG=np.sqrt(np.var(self.gA0)) mG=np.mean(self.gA0) amin, amax=plt.xlim() alist=np.arange(3*amin, amax*3, amax/250.) theoryGdist=norm.pdf(alist, loc=mG, scale=sG) for i in range(len(self.fgnls)): if (self.theoryplot==False): lbl=self.TYPELABEL+"$=$"+NtoSTR(self.fgnls[i]) else: lbl=None neg=np.min(self.fgNLA0[i]-self.gA0) const=9*6.*self.fgnls[i]*self.phisq plot_hist(plt, (self.fgNLA0[i]-self.gA0+const), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled') #plot_hist(plt, (self.fgNLA0[i]-self.gA0), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled') amin, amax=plt.xlim() alist=np.arange(3*amin, amax*3, 0.001) if (self.theoryplot): scl=6.0*self.A0const*self.Nconst*self.fgnls[i] theorynGdist=np.sign(self.fgnls[i])*chi2.pdf(alist,1, scale=scl) plt.plot(alist, theorynGdist, self.clrs[i+1], linestyle=self.ls[i+1], linewidth=LW, label=self.TYPELABEL+"="+NtoSTR(self.fgnls[i])) plt.xlabel(r'$A_0$') plt.ylabel(r'$p(A_0)$') plt.yscale('log') plt.ylim(0.01, 100) plt.xlim(-0.02, 0.25) plt.legend()
def chi2_2sample(s1, s2, bins=20, range=(0,1)): """ Chi square two sample test see: http://www.itl.nist.gov/div898/software/dataplot/refman1/auxillar/chi2samp.htm @return chi-square-statistic, degrees of freedom, p-value """ while True: R, _e = numpy.histogram(s1, bins=bins, range=range) S, _e = numpy.histogram(s2, bins=bins, range=range) if min(R) >= 5 and min(S) >= 5: break bins -= 1 logging.debug('decreased bin count to %d for X2 test: less than 5 samples in a bin' % bins) def _k1(s, r): return math.sqrt(float(numpy.sum(s))/numpy.sum(r)) def _k2(s, r): return math.sqrt(float(numpy.sum(r))/numpy.sum(s)) x2 = 0 K1 = _k1(R, S) K2 = _k2(R, S) for i, r in enumerate(R): u = (K2 * R[i] - K2 * S[i])**2 d = R[i] + S[i] x2 += u/d df = bins - 1 pvalue = chi2.pdf(x2, df) return x2, df, pvalue
def CombinedPvalue(self,output): """ Returns the p value of the combined pvalues based on the method selected """ self.n = len(self.N) if self.method == 'Tippett': self.output = output output = 1-(1-output)**self.n#beta.pdf(output,a = 1, b = self.n) #ST is Beta(1,n) elif self.method == 'Stouffer': self.output = output output = norm.pdf(output,scale = self.n) #SS is N(0,n) elif self.method == 'George': self.output = output output = t.pdf(output,self.n) #SG is Student t distribution (n) elif self.method == 'Ed': self.output = output output = gamma.pdf(output,a = self.n) #SE is Gamma(x,n) elif self.method == 'Pearson': self.output = output output = chi2.cdf(output,2*self.n) #SP is Chi-square df=2n else: self.output = output output = chi2.pdf(output,2*self.n) #SF is Chi-square df=2n return output
def plot_counts(data, xlabel=None, title=None, save=False): fig, ax = plt.subplots(1, 1, tight_layout=True) for p in data: ax.errorbar(x=p['x'], y=p['y'], xerr=p['xerr'], yerr=p['yerr'], linestyle='', color=p['color'], marker=p['marker'], label=p['label'], alpha=0.8) #ax.errorbar(x=data['bin_centres'], y=data['counts'], fmt='+') ax.plot(p['x'], chi2.pdf(p['x'], df=1), label='Χ²', linestyle='-.', color='black', alpha=0.6) if title: ax.set_title(title) if xlabel: ax.set_xlabel(xlabel) ax.set_yscale('log') ax.set_ylabel('counts (normalized)') ax.legend(loc='best') if save: plt.savefig(save) log.info(f'ts distribution plot saved in {save}') else: plt.show()
def get_calibration(distribution, realizations, b=10, plot=False): """ b : number of bins. distribution of the variable and realizations of the variable. (mean, std), float list of length T. """ mean, std = distribution # distribution will be divided into b bins delimited by the deciles of the disribution. pdf = norm.pdf(np.linspace(-5, 5, 100)) rvf = norm.rvs(size=1000, loc=mean, scale=std) deciles = set(qcut(rvf, b).to_numpy()) deciles = [x.right for x in deciles] if plot == True: plt.plot(np.linspace(-5, 5, 100), pdf) x_axis = deciles y_axis = [norm.pdf(x) for x in x_axis] plt.scatter(x_axis, y_axis) plt.show() deciles = sorted([-5] + deciles[:-1] + [5]) bins = [[deciles[i], deciles[i + 1]] for i in range(len(deciles) - 1)] p_content = [1 / b for i in range(b)] counts, _ = np.histogram(realizations, deciles) counts = counts / np.sum(counts) # Now that we have the bins, we should get the right_tail = 2 * len(realizations) * np.sum([ counts[i] * np.log((counts[i] / p_content[i] if counts[i] != 0 else 1)) for i in range(len(p_content)) ]) C = 1 - chi2.pdf(right_tail, df=b - 1) return C
def BackTesting(self, M): ex = self.exceptions(M) p = np.array(self.alphas) x = ex.sum()[1:].values m = ex.count()[1:].values p_est = x / m level = np.array([0.05] * len(self.alphas)) BackTesting = m * p num = (p**x) * (1 - p)**(m - x) den = (p_est**x) * (1 - p_est)**(m - x) test_kupiec = -2 * np.log(list(num / den)) Pvalue = chi2.pdf(test_kupiec, 1) zone = np.where(Pvalue <= level, "Rechazo H0", "No Rechazo H0") #********************************************************************** df = pd.DataFrame( { 'x': x, 'm': m, 'p estimado': p_est, 'nivel de significancia': level, 'test de kupiec': test_kupiec, 'Valor P': Pvalue, "Zona de rechazo": zone, 'BackTesting': BackTesting, "Valor de eficiencia": 1 - p_est }, index=["p=" + str(pp) for pp in p]) return df.T
def visualize_pruning( w_norm, n_retained, title='Initial model weights vs theoretical for pruning'): fig, ax1 = plt.subplots() ax1.set_title(title) ax1.hist(w_norm, normed=True, bins=200, alpha=0.6, histtype='stepfilled', range=[0, n_retained * 5]) ax1.axvline(x=n_retained, linewidth=1, color='r') ax1.set_ylabel('PDF', color='b') ax2 = ax1.twinx() ax2.set_ylabel('Survival Function', color='r') ax1.set_xlabel('w_norm') x = np.linspace(chi2.ppf(0.001, n_retained), chi2.ppf(0.999, n_retained), 100) ax2.plot(x, chi2.sf(x, n_retained), 'g-', lw=1, alpha=0.6, label='chi2 pdf') ax1.plot(x, chi2.pdf(x, n_retained), 'r-', lw=1, alpha=0.6, label='chi2 pdf')
def plot_A0(self): sG=np.sqrt(np.var(self.gA0)) mG=np.mean(self.gA0) amin, amax=plt.xlim() alist=np.arange(3*amin, amax*3, amax/250.) theoryGdist=norm.pdf(alist, loc=mG, scale=sG) for i in range(len(self.fgnls)): if (self.theoryplot==False): lbl=self.TYPELABEL+"$=$"+NtoSTR(self.fgnls[i]) else: lbl=None neg=np.min(self.fgNLA0[i]-self.gA0) const=9*6.*self.fgnls[i]*self.phisq plot_hist(plt, (self.fgNLA0[i]-self.gA0+const)/(1-9.*3.*self.fgnls[i]*self.phisq), clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled') amin, amax=plt.xlim() alist=np.arange(3*amin, amax*3, 0.001) if (self.theoryplot): scl=6.0*self.A0const*self.Nconst*self.fgnls[i] theorynGdist=np.sign(self.fgnls[i])*chi2.pdf(alist,1, scale=scl) plt.plot(alist, theorynGdist, self.clrs[i+1], linestyle=self.ls[i+1], linewidth=LW, label=self.TYPELABEL+"="+NtoSTR(self.fgnls[i])) plt.xlabel(r'$A_0$') plt.ylabel(r'$p(A_0)$') plt.yscale('log') plt.ylim(0.01, 100) plt.xlim(-0.02, 0.25) plt.legend()
def get_chi2_text(self, a1): k = 0 chi2_value = 0 best_value = a1.get_best_fit()['parameters'] r_ = len(best_value) for spec in self.spectrumlist: sp = spec.spectrum e_add = spec.e_add rate = self.model(e_add, best_value) spec1 = self.get_A(rate, spec.e_lo, spec.e_hi, spec.e_add_num) model_sp = spec.transform(spec1) effinde = spec.effective_index if effinde is not None: model_sp = model_sp[effinde[0]:effinde[-1]] sp = sp[effinde[0]:effinde[-1]] index_list = self.chi2_check(model_sp) new_model_sp = [] new_sp = [] for inde_i in index_list: new_model_sp.append(np.sum(model_sp[inde_i])) new_sp.append(np.sum(sp[inde_i])) new_model_sp = np.array(new_model_sp) new_sp = np.array(new_sp) chi2_s = (new_sp - new_model_sp)**2 / new_model_sp k = k + len(chi2_s) chi2_value = chi2_value + chi2_s.sum() df = k - r_ - 1 p = chi2.pdf(chi2_value, df) return {'p': p, 'df': df, 'chi2': chi2_value}
def chisquareMC(df): x = np.random.random() * 10 fx = np.random.random() * 1 if chi2.pdf(x, df) >= fx: return x else: return chisquareMC(df)
def test_inv_chi2_pdf(self): """ Test to the Octave function. >> format long >> chi2pdf(1.5^-1, 4.0) ans = 0.119421885095632 Inverse-chi-square distribution Calculator http://keisan.casio.com/exec/system/1304908316 gives gives (1.5, 4) -> 0.0530763933 R > library(geoR) > dinvchisq(1.5, df=4) > ans=0.05307639 """ octave_chi2pdf = 0.119421885095632 online_calculator = 0.0530763933 r_value = 0.05307639 v1 = 1.5 v2 = 4.0 my_func = inv_chi2(v1, v2) using_inv_gamma = invgamma.pdf(v1, v2 / 2.0, 0.5) print("... Value obtained using inv-gamma :", using_inv_gamma) scipy_chi2pdf = chi2.pdf(v1**-1, v2) print("... Value obtained using chi2 :", scipy_chi2pdf) self.assertAlmostEqual(octave_chi2pdf, scipy_chi2pdf, places=5) self.assertAlmostEqual(my_func, octave_chi2pdf, places=5)
def loglambda_plot(values, name): thetas = values.keys() nplots = len(thetas) rcParams['xtick.major.pad'] = 12 rcParams['ytick.major.pad'] = 12 # For now plot only theta = 0.7 fig = plt.figure() plt.hist(np.array(values[0.7]) * 2.0, normed=True, bins=75, range=(0, 6.5), color='#20A387') xvals = np.linspace(0, 6.5, 300) plt.plot(xvals, chi2.pdf(xvals, df=1.0), color='#440154', alpha=0.8, label=r'$\chi^{2}$, n.d.f = 1') plt.xlabel(r'$-2 \ln \frac{L(\alpha = 0.7)}{L(\hat{\alpha})}$') plt.ylabel('Probability density / %.2f' % (6.5 / 75)) fig.canvas.set_window_title(name) plt.legend(loc='upper right') plt.xlim([0, 6.5]) plt.subplots_adjust(left=0.15, right=0.97, top=0.97, bottom=0.17) fig.show() fig.savefig('loglambda_chisquare_mle_nn.pdf')
def calculate_daughter(self, x) : """ Calculate daughter probability for amplitude """ # c = x*self.__emittance/(self.__parent_emittance**2) c = x/self.__emittance return chi2.pdf(c, self.__ndf)
def sdist(x, sigma, n=10): ''' Calculate PDF of the sampling distribution for standard deviations given the number of samples obtained and the standard deviation of the original distribution. The sampling distribution is given by: (n*S**2)/sigma**2 ~ X2(10) g'(S) ~ X2(10) where S is the standard deviation. We can therefore not directly evaluate the desired pdf. Instead we have to work with this transformed distribution. Define: g(S) = sqrt(S*sigma**2/n) g'(S) = n*S**2/sigma**2 pdf(g'(S)) = chi2.pdf(g'(S), 10) Then the pdf of the transformd variable is given by: pfg(S) = chi2.pdf(g'(S), 10)* g'(S)/dy See http://math.arizona.edu/~jwatkins/f-transform.pdf ''' f = lambda x: chi2.pdf(x, n) nb = n + 1 g_inv = lambda x: (nb * x**2) / (sigma**2) dyg_inv = lambda x: (2 * nb * x) / (sigma**2) y = f(g_inv(x)) * dyg_inv(x) return y
def art_qi2(img, airmask, min_voxels=int(1e3), max_voxels=int(3e5), save_plot=True): r""" Calculates :math:`\text{QI}_2`, based on the goodness-of-fit of a centered :math:`\chi^2` distribution onto the intensity distribution of non-artifactual background (within the "hat" mask): .. math :: \chi^2_n = \frac{2}{(\sigma \sqrt{2})^{2n} \, (n - 1)!}x^{2n - 1}\, e^{-\frac{x}{2}} where :math:`n` is the number of coil elements. :param numpy.ndarray img: input data :param numpy.ndarray airmask: input air mask without artifacts """ from sklearn.neighbors import KernelDensity from scipy.stats import chi2 from mriqc.viz.misc import plot_qi2 # S. Ogawa was born np.random.seed(1191935) data = img[airmask > 0] data = data[data > 0] # Write out figure of the fitting out_file = op.abspath('error.svg') with open(out_file, 'w') as ofh: ofh.write('<p>Background noise fitting could not be plotted.</p>') if len(data) < min_voxels: return 0.0, out_file modelx = data if len(data) < max_voxels else np.random.choice( data, size=max_voxels) x_grid = np.linspace(0.0, np.percentile(data, 99), 1000) # Estimate data pdf with KDE on a random subsample kde_skl = KernelDensity(bandwidth=0.05 * np.percentile(data, 98), kernel='gaussian').fit(modelx[:, np.newaxis]) kde = np.exp(kde_skl.score_samples(x_grid[:, np.newaxis])) # Find cutoff kdethi = np.argmax(kde[::-1] > kde.max() * 0.5) # Fit X^2 param = chi2.fit(modelx[modelx < np.percentile(data, 95)], 32) chi_pdf = chi2.pdf(x_grid, *param[:-2], loc=param[-2], scale=param[-1]) # Compute goodness-of-fit (gof) gof = float(np.abs(kde[-kdethi:] - chi_pdf[-kdethi:]).mean()) if save_plot: out_file = plot_qi2(x_grid, kde, chi_pdf, modelx, kdethi) return gof, out_file
def differential_lrt(x, y, xmin=0): from scipy.stats import chi2 lrtX = bimod_likelihood(x) lrtY = bimod_likelihood(y) lrtZ = bimod_likelihood(np.concatenate((x, y))) lrt_diff = 2 * (lrtX + lrtY - lrtZ) return chi2.pdf(x=lrt_diff, df=3)
def chi2(emission_index=0, t_horizon=100, tstep=.1): """ # use k paramater= t+2 as done in Cherubini (doi: 10.1111/j.1757-1707.2011.01156.x) # """ yrs = np.linspace(0, t_horizon, (t_horizon / tstep + 1)) decay = chi2.pdf(yrs, emission_index + 2) decay = decay * tstep return decay
def plot_chisq_hist(ax, chisqs, nu): '''Plots a histogram of chisq values Also overplots a chisq distribution for nu degrees of freedom. ''' n, bins, patches = plt.hist(chisqs, density=False, facecolor='g') plt.ylabel("Number of Occurrences") plt.xlabel("Chisq Value") x = np.linspace(0, 6, 100) plt.plot(x, chi2.pdf(x, nu)* 100000,'r-', lw=5, alpha=0.6, label='chi2 pdf')
def pdf_random_rotation(self, x, v, mu, kappa, n): """ Gives back the probability of observing the vector x, such that its angle with v is coming from a Von Mises distribution with k = self.kappa and its length coming form chi squared distribution with the parameter n. """ v = v/LA.norm(v,2) x = x/LA.norm(x,2) ang = sum(v*x) return (.5/np.pi)*(chi2.pdf(n*LA.norm(x,2),n)*n)*(vonmises.pdf(ang, kappa))
def chi2_curve(): fig, ax = plt.subplots(1, 1) fig.set_size_inches(4, 2) x = np.linspace(0, 40, 1000) ax.plot(x, chi2.pdf(x, 2), "r-", lw=2, alpha=0.6, label="chi2") ax.vlines(32.977, 0, 0.5, linestyle="--") ax.set_xlabel("Valeur du Chi2") ax.set_ylabel("Probabilité") return fig
def main(): fig, ax = plt.subplots(1, 1) df = 5 loc = 20 scale = 8 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') #x = np.linspace(chi2.ppf(0.01, df, loc, scale),chi2.ppf(0.99, df, loc, scale), 20) valmax = int(chi2.ppf(0.99, df, loc, scale)) + 1 if (valmax % 2 != 0): valmax = valmax + 1 #valmax= 46 x = np.linspace(0, valmax, valmax + 1) #print (x) proba = chi2.pdf(x, df, loc, scale) vs = map(repr, proba.tolist()) repartition = list(zip(x.tolist(), proba.tolist())) nbTranche = 5 valmin = loc space = (valmax - valmin) / nbTranche remain = 100 tranche0 = int(100 * sum([pro for val, pro in repartition if (val < valmin)])) remain -= tranche0 print("P(v < %d) = %d" % (valmin, tranche0)) for traidx in range(nbTranche): deb = valmin + traidx * space fin = valmin + (traidx + 1) * space tranche = int(100 * sum( [pro for val, pro in repartition if (val >= deb) and (val < fin)])) remain -= tranche print("P(%d <= v < %d) = %d" % (deb, fin, tranche)) trancheF = 100 * sum([ pro for val, pro in repartition if (val >= valmin + nbTranche * space) ]) print("P(v >= %d) = %d" % (valmin + nbTranche * space, remain)) for certitude in [0.6, 0.7, 0.75, 0.8, 0.85, 0.90, 0.95]: print("certi = %d , val = %d" % (certitude, int(chi2.ppf(certitude, df, loc, scale)))) #print (" ".join(list(vs)).replace (".",",")) #print (" ".join(proba.tolist())) #print (" ".join().replace(".",",")) ax.plot(x, proba, 'r-', lw=5, alpha=0.6, label='chi2 pdf') #rv = chi2(df, loc, scale) #ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') #vals = chi2.ppf([0.001, 0.5, 0.999], df, loc, scale) #np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df, loc, scale)) #r = chi2.rvs(df, loc = loc, scale = scale , size=1000) #ax.hist(r, density=True, histtype='stepfilled', alpha=0.2) #ax.legend(loc='best', frameon=False) plt.show()
def dchisq(x,df,ncp=0): """ Calculates the density/point estimate of the chi-square distribution """ from scipy.stats import chi2,ncx2 if ncp==0: result=chi2.pdf(x=x,df=df,loc=0,scale=1) else: result=ncx2.pdf(x=x,df=df,nc=ncp,loc=0,scale=1) return result
def chi_squared(df): fig, ax = plt.subplots(1, 1) # Calculate a few first moments: mean, var, skew, kurt = chi2.stats(df, moments='mvsk') # Display the probability density function (pdf): x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') plt.show()
def posterior_sigma(x, mu0, nu0, sigma0, y, kappa0): n = len(y) bar_y = np.mean(y) nu_n = nu0 + n s = np.std(y) sigma_n_squared = (nu0 * sigma0**2 + (n - 1.0) * s**2 + (kappa0 * n) * (bar_y - mu0) / (kappa0 + n)) / nu_n p_squared = chi2.pdf(nu_n**(-1.0), sigma_n_squared) p = np.sqrt(p_squared) return p
def report_lr(model_one_likelihood, model_two_likelihood): x = -2*model_two_likelihood+2*model_one_likelihood #p_value = -1 p_value = chi2.pdf(x,2) r = "-------------------------\n" r += "--Likelihood Ratio Test--\n" r += "-------------------------\n" r+="| The likelihood ratio of MODEL TWO/MODEL ONE (NULL/ALT) is %4.2f\n| This has a p_value of %s\n" % (x,p_value) r += "-------------------------\n" return [r,[x,p_value]]
def get_pdf(dist, x, df=1, loc=0, scale=1): if dist == 'normal': y = norm.pdf(x, loc=loc, scale=scale) elif dist == 'uniform': y = uniform.pdf(x, loc=loc, scale=scale) elif dist == 'chi2': y = chi2.pdf(x, df) else: print("No distribution found with name {:s}".format(dist)) y = np.zeros_like(x) return y
def stat(samples, sets): S = [sum(map(pow, [random.gauss(1, random.random()) for x in range(0,samples)], [2 for x in range(0,samples)] )) for y in range(0,sets)] hist, bins = np.histogram(S, bins = 100) # plt.bar(np.mean(S), hist, align='center') rv = chi2.pdf(S) x = np.linspace(0, np.minimum(rv.dist.b, 3)) h = plt.plot(x, rv.pdf(x))
def confidence_update(self, update, betas): """ To estimate theta, we need to retrieve the appropriate P(E | beta), then optimize the gradient step with Newton Rapson. --- Params: update -- the full update step that will get weighted by confidence betas -- a vector of beta values for all features Returns: weights -- a vector of the new weights """ confidence = [1.0] * self.environment.num_features for i in range(self.environment.num_features): ### Compute update using P(r|beta) for the beta estimate we just computed ### # Compute P(r|beta) mus1 = self.P_beta[self.environment.feature_list[i]+"1"] mus0 = self.P_beta[self.environment.feature_list[i]+"0"] p_r0 = chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) / (chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) + chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2])) p_r1 = chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2]) / (chi2.pdf(betas[i],mus0[0],mus0[1],mus0[2]) + chi2.pdf(betas[i],mus1[0],mus1[1],mus1[2])) l = math.pi # Newton-Rapson setup; define function, derivative, and call optimization method. def f_theta(weights_p): num = p_r1 * np.exp(weights_p * update[i]) denom = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2) + num return weights_p + self.step_size * num * update[i]/denom - self.environment.weights[i] def df_theta(weights_p): num = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2) denom = p_r1 * np.exp(weights_p*update[i]) return 1 + self.step_size * num / denom weight_p = newton(f_theta,self.environment.weights[i],df_theta,tol=1e-04,maxiter=1000) num = p_r1 * np.exp(weight_p * update[i]) denom = p_r0 * (l/math.pi) ** (self.environment.num_features/2.0) * np.exp(-l*update[i]**2) + num confidence[i] = num/denom print "Here is weighted beta:", confidence weights = self.environment.weights - np.array(confidence) * self.step_size * update return weights
def chi2_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x,df)) #simulate the chi2 distribution y = [] n=10 for i in range(1000): chi2r=0.0 r = norm.rvs(size=n) for j in range(n): chi2r=chi2r+r[j]**2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(-4, 4, 100) ax.plot(x, t.pdf(x,df)) #simulate the t-distribution y = [] for i in range(1000): rx = norm.rvs() ry = chi2.rvs(df) rt = rx/np.sqrt(ry/df) y.append(rt) ax.hist(y, normed=True, alpha=0.2) plt.show() fig, ax = plt.subplots(1, 1) #display the probability density function dfn, dfm = 10, 5 x = np.linspace(f.ppf(0.01, dfn, dfm), f.ppf(0.99, dfn, dfm), 100) ax.plot(x, f.pdf(x, dfn, dfm)) #simulate the F-distribution y = [] for i in range(1000): rx = chi2.rvs(dfn) ry = chi2.rvs(dfm) rf = np.sqrt(rx/dfn)/np.sqrt(ry/dfm) y.append(rf) ax.hist(y, normed=True, alpha=0.2) plt.show()
def score_prior(self, ss, hps): score = 0 for mu, var in zip(ss['mu'], ss['var']): if mu <= self.EPSILON or mu >= (1-self.EPSILON): return -np.inf if (var / hps['var_scale']) < self.EPSILON: return -np.inf score += np.log(chi2.pdf(var/hps['var_scale'], self.CHI_VAL)) score += log_dirichlet_dens(ss['pi'], np.ones(hps['comp_k'])*hps['dir_alpha']) return score
def plot_chi2(df): x = list() y = list() for i in np.arange(0, 14, 0.1): x.append(i) # y.append(stats.chi2(i)) y.append(chi2.pdf(i, df)) plt.plot(x, y) plt.axis([0, 14, 0, 0.5]) plt.grid() plt.show()
def sampling_distribution(): fig, ax = plt.subplots(1, 1) #display the probability density function df = 10 x=np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df)) #simulate the sampling distribution y = [] for i in range(1000): r = norm.rvs(loc=5, scale=2, size=df+1) rchi2 =(df)*np.var(r)/4 y.append(rchi2) ax.hist(y, normed=True, alpha=0.2) plt.savefig('sampling_distribution.png')
def rollQStat(n=25): now = dt.datetime.now() precalculatedDict = {} iDate = dt.datetime(1996, 1, 31) while iDate < now: try: nameArray, Qarray = Qstatistic(iDate) p_values = [chi2.pdf(x, 6) for x in Qarray] precalculatedDict[iDate]= [p_values, Qarray] iDate = addMonths(iDate, 1) print iDate.strftime('%Y/%m/%d %H:%M:%S') except: print "Unexpected error:", sys.exc_info()[0] raise Nameoutput = open('Qstatistic.pkl', 'wb') pickle.dump(precalculatedDict, Nameoutput)
def chi2_distribution(): fig,ax = plt.subplots(1, 1) df = 10 x=np.linspace(chi2.ppf(0.01, df),chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x,df)) y = [] n=10 for i in range(1000): chi2r = 0.0 r = norm.rvs(size = 10) for j in range(10): chi2r = chi2r + r[j]**2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.show()
def make_ts_hist(tab,masks,cols,labels,cumulative=True): if not isinstance(cols,list): cols = [cols]*len(masks) plt.figure() for m,l,c in zip(masks,labels,cols): h = Histogram(Axis.create(0.0,20,100)) h.fill(tab[m][c]) h = h.normalize() if cumulative: h = h.cumulative(lhs=False) h.plot(hist_style='step',alpha=0.3,linewidth=2,label=l) dof = 2 label = r"$\chi^2_{1} / 2$" kwargs = dict( label = label, lw=1.5, c='k',dashes=(5,2)) if cumulative: plt.gca().plot(h.axis(0).center, 0.5*(1-chi2.cdf(h.axis(0).edges[:-1],1)),**kwargs) else: plt.gca().plot(h.axis(0).center, h.axis(0).width*chi2.pdf(h.axis(0).center,1),**kwargs) label = r"$\chi^2_{2} / 2$" kwargs = dict( label = label, lw=1.5, c='r',dashes=(5,2)) # plt.gca().plot(h.axis(0).center, # 0.5*(1-chi2.cdf(h.axis(0).center,2)),**kwargs) # if cumulative: # plt.gca().plot(h.axis(0).center, # 0.5*(1-chi2.cdf(h.axis(0).edges[:-1],2)),**kwargs) # else: # plt.gca().plot(h.axis(0).center, # h.axis(0).width*chi2.pdf(h.axis(0).center,2),**kwargs) plt.gca().set_yscale('log') plt.gca().set_ylim(1E-4,1) plt.gca().legend(frameon=False) plt.gca().set_xlabel('TS$_\mathrm{ext}$') plt.gca().set_ylabel('Cumulative Fraction')
def chi2_distribution(): fig, ax = plt.subplots(1, 1) # display the probability density function df = 10 x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df)) # simulate the chi2 distribution y = [] n = 10 for i in range(1000): chi2r = 0.0 r = norm.rvs(size=n) for j in range(n): chi2r = chi2r + r[j] ** 2 y.append(chi2r) ax.hist(y, normed=True, alpha=0.2) plt.savefig('chi2_distribution.png')
def stats(agg): lags_ms = (agg.lags / agg.sample_rate) * 1e3 regions_to_use = ['L1', 'L2', 'L3', 'CM', 'NCM'] for reg in regions_to_use: print '---------------- %s ----------------' % reg i = (agg.df.cc > 0.20) & (agg.df.region == reg) xi = agg.df[i].xindex.values filts = agg.filters[xi, :] cfreqs = compute_best_freq(filts, agg.sample_rate, lags_ms) cfreqs = cfreqs[cfreqs > 0] cfreqs = cfreqs.reshape([len(cfreqs), 1]) gmm1 = GMM(n_components=1) gmm1.fit(cfreqs) lk_null = gmm1.score(cfreqs).sum() aic_null = gmm1.aic(cfreqs) gmm2 = GMM(n_components=2) gmm2.fit(cfreqs) print 'Center frequencies of 2-component GMM:',gmm2.means_.squeeze() print 'Covariances: ',np.sqrt(gmm2.covars_.squeeze()) lk_full = gmm2.score(cfreqs).sum() aic_full = gmm2.aic(cfreqs) lk_rat = -2*(lk_null - lk_full) chi_df = 2 pval = chi2.pdf(lk_rat, chi_df) print 'Null likelihood: %0.6f' % lk_null print 'Full likelihood: %0.6f' % lk_full print 'Likelihood Ratio: %0.6f' % lk_rat print 'p-value: %0.6f' % pval print 'Null AIC: %0.6f' % aic_null print 'Full AIC: %0.6f' % aic_full print 'Relative Likelihood (N=%d): %0.6f' % (i.sum(), np.exp((aic_full - aic_null) / 2.))
def PlotChi2(self): img = gfh.ReadData(self.chi2) hmin = 0#chi2.ppf(0.0001, df) hmax = 1000#chi2.ppf(0.9999, df) bins = 30000 x = np.linspace(hmin,hmax, bins) chng = gfh.ReadData(self.chng) nochng = (1 - chng).astype(np.uint8) histr = cv2.calcHist([img],[0],nochng,[bins],[hmin,hmax]) histr /= sp.integrate.trapz(np.squeeze(histr),x = x) plt.figure(4) plt.plot(x, chi2.pdf(x, self.nvar),'r-', lw=5, alpha=0.6, label='chi2 pdf') plt.plot(x,histr, label = 'chi2 histogram') plt.show() plt.legend()
def randomOfRandomness(): """ Здесь содержится функция, которая создаёт агентов с исходными данными """ wealthOfAllDelta = [] number = 0 for i in np.linspace(0.00001, 0.061, 500): randomNumbers = [] #Список для хранения случайных чисел wealthDivided = [] # Богатство, поделенное случайным образом на случайное количество частей wealthOfOneDeltaASRS = [] wealthOfOneDelta = 1000 * chi2.pdf(600 * i, 20)+0 numberOfAgents = random.randint(1, 1) # Какое количество агентов будет с одним дельта for ii in range(numberOfAgents): # Добавление случайных чисел в список randomNumbers.append(random.randint(1,100)) sumOfrandomAgents = sum(randomNumbers) for x in range(numberOfAgents): # Богатство поделено между агентами в рамках одного дельта wealthDivided.append(randomNumbers[x] * wealthOfOneDelta / sumOfrandomAgents) for wealth in wealthDivided: silver = random.uniform(0, wealth) realEstateMoney = wealth - silver number += 1 wealthOfOneDeltaASRS.append([realEstateMoney, silver, i, number]) wealthOfAllDelta.append(wealthOfOneDeltaASRS) #wealthOfAllDelta = np.array(wealthOfAllDelta) with open('simple_population', 'wb') as f: pickle.dump(wealthOfAllDelta, f) with open('numberOfagents', 'wb') as ff: pickle.dump(number, ff)
simple_LL = -1*loss_function(simplified_results, func=simulate_simplified_dynamics) full_AIC = 2*3 - 2*full_LL simple_AIC = 2*2 - 2*simple_LL delta_AIC = simple_AIC - full_AIC print delta_AIC # The probability that the simplified model is better than the full model is given by its Akaike weight. # In[39]: 1 / (1 + np.exp(.5*delta_AIC)) # To put this in perspective, this probability is roughly the magnitude of flipping a fair coin eight hundred times and it only ever coming up heads. # In[40]: .5**800 # Since the full and simplified model are nested, we can also perform a Likelihood Ratio test, where the test statistic $D$ is $\chi^2$ distributed with one degree of freedom. We reject the null hypothesis that the simplified model is correct. # In[41]: from scipy.stats import chi2 D = 2*(full_LL - simple_LL) chi2_result = chi2.pdf(D,1) print chi2_result
@author: Flavio Lichtenstein @local: Unifesp DIS - Bioinformatica ''' import numpy as np from scipy.stats import chi2 import matplotlib.pyplot as plt fig, ax = plt.subplots(1, 1) # Calculate a few first moments: df = 2 mean, var, skew, kurt = chi2.stats(df, moments='mvsk') # Display the probability density function (pdf): x = np.linspace(chi2.ppf(0.01, df), chi2.ppf(0.99, df), 100) ax.plot(x, chi2.pdf(x, df), 'r-', lw=5, alpha=0.6, label='chi2 pdf') # Alternatively, the distribution object can be called (as a function) to fix the shape, location and scale parameters. This returns a “frozen” RV object holding the given parameters fixed. # Freeze the distribution and display the frozen pdf: rv = chi2(df) ax.plot(x, rv.pdf(x), 'k-', lw=2, label='frozen pdf') vals = chi2.ppf([0.001, 0.5, 0.999], df) print np.allclose([0.001, 0.5, 0.999], chi2.cdf(vals, df)) # Generate random numbers: r = chi2.rvs(df, size=10000) ax.hist(r, normed=True, histtype='stepfilled', alpha=0.2)
def run(self, dataSlice, slicePoint=None): #Get the visit information m5 = dataSlice[self.m5col] #Number of visits N = len(m5) #magnitudes to be sampled mag = np.arange(16,np.mean(m5),0.5) #hold the distance between the completeness and contamination goals. res = np.zeros(mag.shape) #make them nans for now res[:] = np.nan #hold the measured noise-only variances noiseonlyvar = np.zeros(self.numruns) #Calculate the variance at a reference magnitude and scale from that m0=20. sigmaref = 0.2 * (10.**(-0.2*m5)) * (10.**(0.2*m0)) #run the simulations #Simulate the measured noise-only variances at a reference magnitude for i in np.arange(self.numruns): # random realization of the Gaussian error distributions scatter = np.random.randn(N)*sigmaref noiseonlyvar[i] = np.var(scatter) # store the noise-only variance #Since we are treating the underlying signal being representable by a #fixed-width gaussian, its variance pdf is a Chi-squared distribution #with the degrees of freedom = visits. Since variances add, the variance #pdfs convolve. The cumulative distribution function of the sum of two #random deviates is the convolution of one pdf with a cdf. #We'll consider the cdf of the noise-only variances because it's easier #to interpolate noisesorted = np.sort(noiseonlyvar) #linear interpolation interpnoisecdf = UnivariateSpline(noisesorted,np.arange(self.numruns)/float(self.numruns),k=1,s=0) #We need a binned, signal-only variance probability distribution function for numerical convolution numsignalsamples = 100 xsig = np.linspace(chi2.ppf(0.001, N),chi2.ppf(0.999, N),numsignalsamples) signalpdf = chi2.pdf(xsig, N) #correct x to the proper variance scale xsig = (self.signal**2.)*xsig/N pdfstepsize = xsig[1]-xsig[0] #Since everything is going to use this stepsize down the line, #normalize so the pdf integrates to 1 when summed (no factor of stepsize needed) signalpdf /= np.sum(signalpdf) #run through the sample magnitudes, calculate distance between cont #and comp thresholds. #run until solution found. solutionfound=False for i,mref in enumerate(mag): #i counts and mref is the currently sampled magnitude #Scale factor from m0 scalefact = 10.**(0.4*(mref-m0)) #Calculate the desired contamination threshold contthresh = np.percentile(noiseonlyvar,100.-100.*self.contamination)*scalefact #Realize the noise CDF at the required stepsize xnoise = np.arange(noisesorted[0]*scalefact,noisesorted[-1]*scalefact,pdfstepsize) #Only do calculation if near the solution: if (len(xnoise) > numsignalsamples/10) and (not solutionfound): noisecdf = interpnoisecdf(xnoise/scalefact) noisepdf = (noisecdf[1:]-noisecdf[:-1]) #turn into a noise pdf noisepdf /= np.sum(noisepdf) xnoise = (xnoise[1:]+xnoise[:-1])/2. #from cdf to pdf conversion #calculate and plot the convolution = signal+noise variance dist. convolution=0 if len(noisepdf) > len(signalpdf): convolution = np.convolve(noisepdf,signalpdf) else: convolution = np.convolve(signalpdf,noisepdf) xconvolved = xsig[0]+xnoise[0]+np.arange(len(convolution))*pdfstepsize #calculate the completeness threshold combinedcdf = np.cumsum(convolution) findcompthresh = UnivariateSpline(combinedcdf,xconvolved,k=1,s=0) compthresh = findcompthresh(1.-self.completeness) res[i] = compthresh - contthresh if res[i] < 0: solutionfound = True #interpolate for where the thresholds coincide #print res if np.sum(np.isfinite(res)) > 1: f1 = UnivariateSpline(mag[np.isfinite(res)],res[np.isfinite(res)],k=1,s=0) #sample the magnitude range at given resolution magsamples = np.arange(16,np.mean(m5),self.magres) vardepth = magsamples[np.argmin(np.abs(f1(magsamples)))] return vardepth else: return min(mag)-1
def errfunc(p,x,y): """produce goodness measures for fitfunc applied to independant variable x compared with data y for chi2(2) statistics""" chis = 2*y / fitfunc(p,x) #values relative to current model probs = chi2.pdf(chis,2) #probability for each value return log(probs).sum() #function goodness
ax.text(8.9, 60., '$\hat{\mu} = \\frac{1}{15}\sum x_i = %.1f$' % np.mean(data), fontsize=18, bbox={'facecolor': 'none', 'pad':14, 'ec': 'r'}) fig.subplots_adjust(bottom=0.15) pl.savefig('../chisquared_data.pdf') pl.show() fig.clf() pl.close(fig) # new figure fig = pl.figure(figsize=(10,5), dpi=100) # get the chi-squared value from the data chisq = np.sum((data - np.mean(data))**2/(sigmad**2)) x = np.linspace(0., 50., 1000) c2pdf = chi2.pdf(x, nu) # plot chi-squared pdf pl.plot(x, c2pdf) pl.plot([chisq, chisq], [0, np.max(c2pdf)], 'k--') pl.fill_between(x, np.zeros(len(x)), c2pdf, where=x>=chisq, alpha=0.6, facecolor='green', interpolate=True) ax = pl.gca() ax.set_xlabel('$\chi^2$', fontsize=14) ax.set_ylabel('$p(\chi^2)$', fontsize=14) ax.text(chisq+2, 0.07, '$p_{14}(\chi^2) = %.2f$' % chisq, fontsize=16) # cumulative function c2cdf = chi2.cdf(x, nu) pvalue = 1.-c2cdf[x<=chisq][-1] print pvalue
def plot_A0(self): if self.TYPE != "gNL": plot_hist(plt, self.gA0, clr="skyblue", alp=ALPHA, ht="stepfilled") sG = np.sqrt(np.var(self.gA0)) mG = np.mean(self.gA0) amin, amax = plt.xlim() alist = np.arange(3 * amin, amax * 3, amax / 250.0) theoryGdist = norm.pdf(alist, loc=mG, scale=sG) if self.TYPE != "gNL": plt.plot( alist, theoryGdist, self.clrs[0], linestyle=self.ls[0], linewidth=LW, label=self.TYPELABEL + "$=0$" ) for i in range(len(self.fgnls)): if self.theoryplot == False: lbl = self.TYPELABEL + "$=$" + NtoSTR(self.fgnls[i]) else: lbl = None if self.TYPE == "fNL": plot_hist(plt, self.fgNLA0[i], clr=self.clrs[i + 1], alp=ALPHA, labl=lbl, ht="stepfilled") if self.TYPE == "gNL": # plot_hist(plt, self.fgNLA0[i]-self.gA0+6*self.fgnls[i]*self.efolds*self.A0const-6*self.fgnls[i]*self.phisq, clr=self.clrs[i+1], alp=ALPHA, labl=lbl, ht='stepfilled') neg = np.min(self.fgNLA0[i] - self.gA0) const = 9 * 6.0 * self.fgnls[i] * 1.5e-8 # scl=6.0*self.A0const*self.Nconst*self.fgnls[i] plot_hist( plt, (self.fgNLA0[i] - self.gA0 + const) / (1 - 9.0 * 3.0 * self.fgnls[i] * 1.5e-8), clr=self.clrs[i + 1], alp=ALPHA, labl=lbl, ht="stepfilled", ) amin, amax = plt.xlim() alist = np.arange(3 * amin, amax * 3, 0.001) if self.theoryplot: if self.TYPE == "fNL": theorynGdist = norm.pdf( alist, loc=mG, scale=np.sqrt(16.0 * self.A0const * self.Nconst * self.fgnls[i] ** 2.0 + sG ** 2.0), ) if self.TYPE == "gNL": scl = 6.0 * self.A0const * self.Nconst * self.fgnls[i] # scl=2.0*self.A0const*self.efolds*self.fgnls[i] theorynGdist = np.sign(self.fgnls[i]) * chi2.pdf(alist, 1, scale=scl) # new """ sigmaphi00=np.sqrt(self.A0const*self.efolds) a0=24.*np.pi*self.fgnls[i] theorynGdist=1./a0/np.sqrt(2.*np.pi)/sigmaphi00 * np.sqrt(a0/(alist+a0*sigmaphi00^2))*np.exp(-(alist+a0*sigmaphi00**2.0)/(2.*a0*sigmaphi00**2.0)) self.theorynGdist=theorynGdist """ LW1 = self.get_LW1(i) plt.plot( alist, theorynGdist, self.clrs[i + 1], linestyle=self.ls[i + 1], linewidth=LW1, label=self.TYPELABEL + "$=$" + NtoSTR(self.fgnls[i]), ) plt.xlabel(r"$A_0$") plt.ylabel(r"$p(A_0)$") plt.yscale("log") if self.TYPE == "fNL": plt.xlim(-1.0, 1.0) plt.ylim(0.1, 100) if self.TYPE == "gNL": # plt.xscale('log') plt.ylim(0.05, 1000) plt.xlim(-0.02, 0.25) plt.legend()
Q = inv(sqrtm(Sigma)) # == Generate observations of the normalized sample mean == # error_obs = np.empty((2, replications)) for i in range(replications): # == Generate one sequence of bivariate shocks == # X = np.empty((2, n)) W = dw.rvs(n) U = du.rvs(n) # == Construct the n observations of the random vector == # X[0, :] = W X[1, :] = W + U # == Construct the i-th observation of Y_n == # error_obs[:, i] = np.sqrt(n) * X.mean(axis=1) # == Premultiply by Q and then take the squared norm == # temp = np.dot(Q, error_obs) chisq_obs = np.sum(temp**2, axis=0) # == Plot == # fig, ax = plt.subplots() xmax = 8 ax.set_xlim(0, 8) xgrid = np.linspace(0, 8, 200) lb = "Chi-squared with 2 degrees of freedom" ax.plot(xgrid, chi2.pdf(xgrid, 2), 'k-', lw=2, label=lb) ax.legend() ax.hist(chisq_obs, bins=50, normed=True) plt.show()
import matplotlib.pyplot as plt import numpy as np from numpy import linspace from scipy.stats import chisquare, chi2 from scipy.stats.kde import gaussian_kde m = 1000 n = 100 k = 10 y = [] for i in range(m): x = [random.randint(1, k) for i in range(n)] y.append(chisquare(x)[0]) mean_y, std_y = np.mean(y), np.std(y) y = [(i - mean_y) / sqrt(np.std(y)) for i in y] t = min(y) y = [i - t for i in y] pdf = gaussian_kde(y) a = linspace(min(y), max(y), len(y) // 10) fig, ax = plt.subplots(2, 1) fig.subplots_adjust(wspace=0) ax[0].plot(a, pdf(a)) ax[1].plot(a, chi2.pdf(a,k-1)) plt.show()
xu = np.linspace(chi2.ppf(0.01, dof), chi2.ppf(0.99, dof), 100) ax.axvline(x=dof/df,color='k', linestyle='dashed',lw=4, label='UGC11680NED01 $\chi^2$') ax.hist(chi_sfh/df,bins=30, normed=False,weights=weights, histtype='step', lw=3,label='All AGNs $\chi^2$ reduced distribution') ax.legend(loc='best', frameon=False) ax.set_ylabel('Probability density $\chi ^2$') ax.set_xlabel('$x$') plt.show() 1+ 1./2. + 1./3. + 1./4. +1./5. + 1./6. +1./7. +1./8. + 1./9. + 1./10. + 1./11. +1./12. +1./13. + 1./14. + 1./15. +1./16. +1./17. +1./18. +1./19. +1./20. +1./21.+ 1./22. +1./23. +1./24. 3.7759581777535067 2(1368) = 2736 n=1368.9986218003824*(chi2.pdf(x, df)) 2736+3.7759581777535067 varianza= 52.34286921995921 media= 1368 mass 8, color 1 2736+2.45 varianza= 52.33020160480943 media= 1368 mass 9 color 1
def test(): x = linspace(0.1, 25, 200) for dof in [1, 2, 3, 5, 10, 50]: plot(x, chi2.pdf(x, dof))
def draw_figures(): pfile = '/auto/tdrive/mschachter/data/aggregate/decoders_pairwise_coherence_multi_freq.h5' agg = AggregatePairwiseDecoder.load(pfile) nbands = agg.df['band'].max() sample_rate = 381.4697265625 freqs = get_freqs(sample_rate) g = agg.df.groupby(['bird', 'block', 'segment', 'hemi']) """ # TODO: compute the average likelihood ratio between intecept-only and full model for all sites! i = (agg.df['bird'] == 'GreBlu9508M') & (agg.df['block'] == 'Site4') & (agg.df['segment'] == 'Call1') & (agg.df['hemi'] == 'L') & (agg.df['band'] == 0) assert i.sum() == 1 full_likelihood_for_null = agg.df['likelihood'][i].values[0] null_likelihood = 1.63 # for GreBlu9508_Site4_Call1_L null_likelihood_ratio = 2*(null_likelihood - full_likelihood_for_null) print 'full_likelihood_for_null=',full_likelihood_for_null print 'null_likelihood=',null_likelihood print 'null_likelihood_ratio=',null_likelihood_ratio """ full_likelihoods = list() likelihood_ratios = list() pccs = list() pcc_thresh = 0.25 single_band_likelihoods = list() single_band_pccs = list() for (bird,block,seg,hemi),gdf in g: # get the likelihood of the fullmodel i = gdf['band'] == 0 assert i.sum() == 1 num_samps = gdf[i]['num_samps'].values[0] print 'num_samps=%d' % num_samps full_likelihood = -gdf[i]['likelihood'].values[0] * num_samps pcc = gdf[i]['pcc'].values[0] if pcc < pcc_thresh: continue full_likelihoods.append(full_likelihood) pccs.append(pcc) # get the likelihood per frequency band ratio_by_band = np.zeros(nbands) single_likelihood_by_band = np.zeros(nbands) single_pcc_band = np.zeros(nbands) for k in range(nbands): i = (gdf['band'] == k+1) & (gdf['exfreq'] == True) assert i.sum() == 1 num_samps2 = gdf[i]['num_samps'].values[0] assert num_samps2 == num_samps leftout_likelihood = -gdf[i]['likelihood'].values[0] * num_samps i = (gdf['band'] == k+1) & (gdf['exfreq'] == False) assert i.sum() == 1 num_samps3 = gdf[i]['num_samps'].values[0] assert num_samps3 == num_samps2 single_leftout_likelihood = -gdf[i]['likelihood'].values[0] * num_samps pcc = gdf[i]['pcc'].values[0] print '(%s,%s,%s,%s,%d) leftout=%0.6f, full=%0.6f, single=%0.6f, single_pcc=%0.6f, num_samps=%d' % \ (bird, block, seg, hemi, k, leftout_likelihood, full_likelihood, single_leftout_likelihood, pcc, num_samps) # compute the likelihood ratio lratio = -2*(leftout_likelihood - full_likelihood) ratio_by_band[k] = lratio single_likelihood_by_band[k] = single_leftout_likelihood single_pcc_band[k] = pcc likelihood_ratios.append(ratio_by_band) single_band_likelihoods.append(single_likelihood_by_band) single_band_pccs.append(single_pcc_band) pccs = np.array(pccs) likelihood_ratios = np.array(likelihood_ratios) full_likelihoods = np.array(full_likelihoods) single_band_likelihoods = np.array(single_band_likelihoods) single_band_pccs = np.array(single_band_pccs) # exclude segments whose likelihood ratio goes below zero # i = np.array([np.any(lrat < 0) for lrat in likelihood_ratios]) i = np.ones(len(likelihood_ratios), dtype='bool') print 'i.sum()=%d' % i.sum() # compute significance threshold x = np.linspace(1, 150, 1000) df = 12 p = chi2.pdf(x, df) sig_thresh = max(x[p > 0.01]) # compute mean and std lrat_mean = likelihood_ratios[i, :].mean(axis=0) lrat_std = likelihood_ratios[i, :].std(axis=0, ddof=1) single_l_mean = single_band_likelihoods[i, :].mean(axis=0) single_l_std = single_band_likelihoods[i, :].std(axis=0, ddof=1) single_pcc_mean = single_band_pccs[i, :].mean(axis=0) single_pcc_std = single_band_pccs[i, :].std(axis=0, ddof=1) fig = plt.figure(figsize=(24, 16)) plt.subplots_adjust(top=0.95, bottom=0.05, left=0.05, right=0.99, hspace=0.40, wspace=0.20) ax = plt.subplot(2, 3, 1) plt.plot(full_likelihoods, pccs, 'go', linewidth=2.0) plt.xlabel('log Likelihood') plt.ylabel('PCC') plt.axis('tight') ax = plt.subplot(2, 3, 2) for k,lrat in enumerate(likelihood_ratios[i, :]): plt.plot(freqs, lrat, '-', linewidth=2.0, alpha=0.7) plt.xlabel('Frequency (Hz)') plt.ylabel('Likelihood Ratio') plt.axis('tight') ax = plt.subplot(2, 3, 4) nsamps = len(likelihood_ratios) plt.errorbar(freqs, single_pcc_mean, yerr=single_pcc_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75) plt.xlabel('Frequency (Hz)') plt.ylabel('PCC') plt.title('Mean Single Band Decoder PCC') plt.axis('tight') ax = plt.subplot(2, 3, 5) plt.errorbar(freqs, single_l_mean, yerr=single_l_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75) plt.xlabel('Frequency (Hz)') plt.ylabel('log Likelihood') plt.title('Mean Single Band Decoder Likelihood') plt.axis('tight') ax = plt.subplot(2, 3, 6) plt.errorbar(freqs, lrat_mean, yerr=lrat_std/np.sqrt(nsamps), ecolor='r', elinewidth=3.0, fmt='k-', linewidth=7.0, alpha=0.75) plt.plot(freqs, np.ones_like(freqs)*sig_thresh, 'k--', linewidth=7.0, alpha=0.75) plt.xlabel('Frequency (Hz)') plt.ylabel('Likelihood Ratio') plt.title('Mean Likelihood Ratio') plt.axis('tight') plt.ylim(0, lrat_mean.max()) fname = os.path.join(get_this_dir(), 'figs.svg') plt.savefig(fname, facecolor='w', edgecolor='none') plt.show()