def _detect_hotspots(self): """ Sets a flag if a node is overloaded or underloaded. Forecasts are used to detect hotspots. """ # Here we find out whether detect hotspot is being called from its subclass or not # if not, we will just use the default hosts list. Otherwise, we will use the hosts list # without the reserve node node_list = [] if hasattr(self, "node_list"): node_list = self.node_list else: node_list = self.model.get_hosts(types.NODE) for node in node_list: cpu_loads = node.get_readings() cpu_loads = cpu_loads[-K_VALUE:] node.underloaded = False node.overloaded = False _, p_value = stats.ttest_1samp(cpu_loads, THRESHOLD_OVERLOAD) current_mean = np.mean(cpu_loads, dtype=int) if p_value < ALPHA and current_mean >= THRESHOLD_OVERLOAD: node.overloaded = True else: _, p_value = stats.ttest_1samp(cpu_loads, THRESHOLD_UNDERLOAD) if p_value < ALPHA and current_mean <= THRESHOLD_UNDERLOAD: node.underloaded = True
def output_crosstrain(specieslist, numEls = 1000, numtries = 1000): '''Cross-train all species in the list with all species in the list. Record mean values, and p-values against null hypothesis loss=1 (no performance difference between training and cross-training) ''' ctf1s = np.zeros((len(specieslist), len(specieslist))) ctf1s_pv = np.zeros((len(specieslist), len(specieslist))) ctars = np.zeros((len(specieslist), len(specieslist))) ctars_pv = np.zeros((len(specieslist), len(specieslist))) for s1 in range(len(specieslist)): for s2 in range(len(specieslist)): print('Training with ' + specieslist[s1] + ', testing with ' + specieslist[s2]) losses = cross_train(specieslist[s1], specieslist[s2], numEls, numtries) ctf1s[s1,s2] = np.mean(losses['f1']) ctf1s_pv[s1, s2] = ttest_1samp(losses['f1'], 1)[1] ctars[s1, s2] = np.mean(losses['area']) ctars_pv[s1, s2] = ttest_1samp(losses['area'], 1)[1] np.savetxt(crosstrain_f1_file, ctf1s, delimiter='\t') np.savetxt(crosstrain_f1_pv_file, ctf1s_pv, delimiter='\t') np.savetxt(crosstrain_roc_file, ctars, delimiter='\t') np.savetxt(crosstrain_roc_pv_file, ctars_pv, delimiter='\t')
def plot(self, signif=0.1): if not(hasattr(self, 'compos')): self.composite() if isinstance(self.compos, pd.core.frame.DataFrame): l = len(self.compos.columns) f, axes = plt.subplots(nrows=1, ncols=l, figsize=(l,6), sharey=True) f.subplots_adjust(wspace=0.0, left=0.15, bottom=0.05, top=0.87) axes = axes.flatten('F') for i, k in enumerate(self.compos.columns): df = self.compos[[k]] t, pval = ttest_1samp(df.values, 0) self._plot_df(df, axes[i], ax_n=i, pval=pval[0], signif=signif) elif isinstance(self.compos, dict): l = len(self.compos.keys()) f, axes = plt.subplots(nrows=1, ncols=l, figsize=(l,6), sharey=True) f.subplots_adjust(wspace=0.0, left=0.15, bottom=0.05, top=0.87) axes = axes.flatten('F') for i, k in enumerate(self.compos.keys()): df = self.compos[k] t, pval = ttest_1samp(df.values, 0) self._plot_df(df, axes[i], ax_n=i, pval=pval[0], signif=signif) return f
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) # not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.mean(0), d2w.mean, 14) assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) # note: the following is for 1d assert_almost_equal(np.cov(x2r, bias=1), d2w.cov, 14) # assert_almost_equal(np.corrcoef(np.x2r), d2w.corrcoef, 19) # TODO: exception in corrcoef (scalar case) # one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def test_weightstats_2(self): x1, x2 = self.x1, self.x2 w1, w2 = self.w1, self.w2 d1 = DescrStatsW(x1) d1w = DescrStatsW(x1, weights=w1) d2w = DescrStatsW(x2, weights=w2) x1r = d1w.asrepeats() x2r = d2w.asrepeats() # print 'random weights' # print ttest_ind(x1, x2, weights=(w1, w2)) # print stats.ttest_ind(x1r, x2r) assert_almost_equal(ttest_ind(x1, x2, weights=(w1, w2))[:2], stats.ttest_ind(x1r, x2r), 14) #not the same as new version with random weights/replication # assert x1r.shape[0] == d1w.sum_weights # assert x2r.shape[0] == d2w.sum_weights assert_almost_equal(x2r.var(), d2w.var, 14) assert_almost_equal(x2r.std(), d2w.std, 14) #one-sample tests # print d1.ttest_mean(3) # print stats.ttest_1samp(x1, 3) # print d1w.ttest_mean(3) # print stats.ttest_1samp(x1r, 3) assert_almost_equal(d1.ttest_mean(3)[:2], stats.ttest_1samp(x1, 3), 11) assert_almost_equal(d1w.ttest_mean(3)[:2], stats.ttest_1samp(x1r, 3), 11)
def getDevMod(w,nulls,rep,use_c): """ Get the deviation from random expectation of modularity. Optimized so that the null webs are gone through only one time. Retunrs two arrays, one for Qr, the other for Qb. """ m = [w.modules.Q,w.modules.N,w.modules.up_modules,w.modules.low_modules] Qbsim = [] Qrsim = [] wQr = Qr(w.web,m) wQb = w.modules.Q for c_null in nulls: c_mod = findModules(c_null, use_c = use_c) Qrsim.append(Qr(c_null, c_mod)) Qbsim.append(c_mod[0]) testResB = spp.ttest_1samp(Qbsim, wQb) testResR = spp.ttest_1samp(Qrsim, wQr) OUT_r = [wQr,testResR[1]] OUT_b = [wQb,testResB[1]] est_r = gMIC(Qrsim) est_b = gMIC(Qbsim) for est_par in est_r: OUT_r.append(est_par) for est_par in est_b: OUT_b.append(est_par) return [OUT_r,OUT_b]
def getDevNest(w,list,use_c): expect = [] expect_up = [] expect_lo = [] for i in list: Nodf = nodf(i,strict=w.nodf_strict,use_c=use_c) expect.append(Nodf[0]) expect_up.append(Nodf[2]) expect_lo.append(Nodf[1]) testRes = spp.ttest_1samp(expect, w.nodf) testRes_up = spp.ttest_1samp(expect_up, w.nodf_up) testRes_lo = spp.ttest_1samp(expect_lo, w.nodf_low) OUT = [w.nodf,testRes[1]] OUT_up = [w.nodf_up,testRes_up[1]] OUT_lo = [w.nodf_low,testRes_lo[1]] est = gMIC(expect) est_lo = gMIC(expect_lo) est_up = gMIC(expect_up) for est_par in est: OUT.append(est_par) for est_par in est_lo: OUT_lo.append(est_par) for est_par in est_up: OUT_up.append(est_par) return [OUT, OUT_lo, OUT_up]
def tTest(list1,list2): f1 = len(list1) <= 1 f2 = len(list2) <= 1 if not f1 and not f2:# if they both have multiple values, use the 2 sample t test return s.ttest_ind(list1,list2)[1] if f1 and not f2:# if list 2 has multiple values and list 1 has single value, use 1 sample ttest return s.ttest_1samp(list2,list1[0])[1] if not f1 and f2:# list 2 has single values, list 1 has multiple values return s.ttest_1samp(list1,list2[0])[1]
def generate_sequence_gene_expression_statistics(self, show_species_charts=True, show_chart=True): i = -1 if self.multiple_networks: for nw_ge_file in glob.glob(self.output_silix_nw_exp_data_folder_path + '/*.txt'): i += 1 mapping_data = np.genfromtxt(nw_ge_file, delimiter=',', dtype=str) if len(mapping_data) > 0: print 'Network: ', i, mapping_data.shape x = np.array(mapping_data[:, 2], dtype=float) y = np.array(mapping_data[:, 3], dtype=float) ca_stat = ca_pvalue = spike_stat = spike_pvalue = ind_stat = ind_pvalue = 0 if not np.all(x == 0): ca_stat, ca_pvalue = stats.ttest_1samp(x[x != 0], 0) spike_stat, spike_pvalue = stats.ttest_1samp(y[y != 0], 0) ind_stat, ind_pvalue = stats.ttest_ind(x[x != 0], y[y != 0], equal_var=False) nw_number = (int)(re.findall(r'\d+', nw_ge_file)[0]) nw_statistics = ( [nw_number, x[x != 0].mean(), x[x != 0].var(), x[x != 0].std(), y[y != 0].mean(), y[y != 0].var(), y[y != 0].std(), ca_stat, ca_pvalue, spike_stat, spike_pvalue, ind_stat, ind_pvalue]) self.network_gene_expressions.append(nw_statistics) else: mapping_data = np.genfromtxt(self.output_silix_nw_exp_data_folder_path + self.silix_nw_exp_data_filename, delimiter=',', dtype=str) if len(mapping_data) > 0: print 'Network: ', mapping_data.shape x = np.array(mapping_data[:, 2], dtype=float) y = np.array(mapping_data[:, 3], dtype=float) ca_stat = ca_pvalue = spike_stat = spike_pvalue = ind_stat = ind_pvalue = 0 if not np.all(x == 0): ca_stat, ca_pvalue = stats.ttest_1samp(x[x != 0], 0) spike_stat, spike_pvalue = stats.ttest_1samp(y[y != 0], 0) ind_stat, ind_pvalue = stats.ttest_ind(x[x != 0], y[y != 0], equal_var=False) nw_statistics = ( [0, x[x != 0].mean(), x[x != 0].var(), x[x != 0].std(), y[y != 0].mean(), y[y != 0].var(), y[y != 0].std(), ca_stat, ca_pvalue, spike_stat, spike_pvalue, ind_stat, ind_pvalue]) self.network_gene_expressions.append(nw_statistics) # convert list into array self.network_gene_expressions = np.asarray(self.network_gene_expressions) # Save network gene expression statistics to csv file gene_expression_statistics_file = self.output_silix_nw_exp_data_folder_path + 'gene_expression_statistics.csv' with open(gene_expression_statistics_file, 'w') as f_handle: f_handle.write( 'Network, 9mM CA Mean, 9mM CA Var, 9mM CA SD, Spike Mean, Spike Var, Spike SD, 9mM CA ttest-stat, 9mM CA ttest-pvalue, Spike ttest-stat, Spike ttest-pvalue, Ind ttest-stat, Ind ttest-pvalue \n') np.savetxt(f_handle, self.network_gene_expressions, delimiter=',') if show_species_charts: self.generate_species_wise_gene_expression_statistics() if self.multiple_networks and show_chart: self.plot_all_nw_gene_expr_stats_chart() elif show_chart and not self.multiple_networks: self.plot_single_network_gene_expr_stats_chart()
def createRegressionPlots(predictions,performance,coefs,fb_coefs,nfb_coefs,GroupDF,goodsubj,savefig=True): f=plt.figure(figsize=(22,12)) ax1=plt.subplot2grid((2,4),(0,0), colspan=3) ax2=plt.subplot2grid((2,4),(0,3)) ax3=plt.subplot2grid((2,4),(1,0), colspan=2) ax4=plt.subplot2grid((2,4),(1,2), colspan=2) dmnIdeal=pd.read_csv('/home/jmuraskin/Projects/NFB/analysis/DMN_ideal_2.csv') sns.tsplot(data=predictions,time='TR',value='predicted',unit='subj',condition='fb',ax=ax1) ax1.plot((dmnIdeal['Wander']-dmnIdeal['Focus'])/3,'k--') ax1.set_title('Average Predicted Time Series') g=sns.violinplot(data=performance,x='fb',y='R',split=True,bw=.3,inner='quartile',ax=ax2) # plt.close(g.fig) g=sns.violinplot(data=coefs,x='pe',y='Coef',hue='fb',split=True,bw=.3,inner='quartile',ax=ax3) g.plot([-1,len(unique(coefs['pe']))],[0,0],'k--') g.set_xlim([-.5,len(unique(coefs['pe']))]) ylim=g.get_ylim() t,p = ttest_1samp(np.array(performance[performance.fb=='FEEDBACK']['R'])-np.array(performance[performance.fb=='NOFEEDBACK']['R']),0) ax2.set_title('Mean Subject Time Series Correlations-p=%0.2f' % p) t,p = ttest_1samp(np.array(fb_coefs['Coef'].reshape(len(unique(GroupDF[GroupDF.Subject_ID.isin(goodsubj)]['Subject_ID'])),len(unique(coefs['pe'])))),0) p05_FB,padj=fdr_correction(p,0.05) t,p = ttest_1samp(np.array(nfb_coefs['Coef'].reshape(len(unique(GroupDF[GroupDF.Subject_ID.isin(goodsubj)]['Subject_ID'])),len(unique(coefs['pe'])))),0) p05_NFB,padj=fdr_correction(p,0.05) for idx,(pFDR_FB,pFDR_NFB) in enumerate(zip(p05_FB,p05_NFB)): if pFDR_FB: ax3.scatter(idx,ylim[1]-.05,marker='*',s=75) if pFDR_NFB: ax3.scatter(idx,ylim[0]+.05,marker='*',s=75) t,p=ttest_1samp(np.array(fb_coefs['Coef']-nfb_coefs['Coef']).reshape(len(unique(GroupDF[GroupDF.Subject_ID.isin(goodsubj)]['Subject_ID'])),len(unique(coefs['pe']))),0) p05,padj=fdr_correction(p,0.05) sns.barplot(x=range(len(t)),y=t,ax=ax4,color='Red') for idx,pFDR in enumerate(p05): if pFDR: ax4.scatter(idx,t[idx]+ np.sign(t[idx])*0.2,marker='*',s=75) ax4.set_xlim([-0.5,len(unique(coefs['pe']))]) ax4.set_xlabel('pe') ax4.set_ylabel('t-value') ax4.set_title('FB vs. nFB PE') for ax in [ax1,ax2,ax3,ax4]: for item in ([ax.title, ax.xaxis.label, ax.yaxis.label]): item.set_fontsize(18) for item in (ax.get_xticklabels() + ax.get_yticklabels()): item.set_fontsize(12) f.tight_layout() if savefig: f.savefig('%s/RSN_LinearRegPrediction.pdf' % saveFigureLocation,dpi=300)
def print_summary_wo_outliers(event_list, attribute, lag, length, outlier_abs_threshold): series = [e.concat_data.series_after(attribute, lag=lag, length=length) for e in event_list] returns = [s.sum() for s in series] print('Full sample') print('Size:', len(returns)) print('Mean:', np.mean(returns)) print(stats.ttest_1samp(returns, 0)[1]) print() wo_outliers = [r for r in returns if abs(r) < outlier_abs_threshold] print('Without outliers') print('Size: ', len(wo_outliers)) print('Mean:', np.mean(wo_outliers)) print(stats.ttest_1samp(wo_outliers, 0)[1])
def pairedt(pairs, numSamples): results = dict() t,v = pairs.items() diffs = [t[1][x] - v[1][x] for x in range(len(t[1]))] plotDiffs(diffs) sampleSize = int(len(diffs)/numSamples) indices = range(len(diffs)) random.shuffle(indices) mean_diffs = [] i = 0 for sample in range(numSamples): total_diff = 0 for x in range(sampleSize): index = indices[i] total_diff += diffs[index] i+=1 sample_avg = total_diff/float(sampleSize) mean_diffs.append(sample_avg) #normality check nt = stats.normaltest(mean_diffs) results['normal_p'] = format(round(nt[1],4)) #ttest t_prob = stats.ttest_1samp(mean_diffs, 0) results['ttest_t'] = format(round(t_prob[0],4)) results['ttest_p'] = format(round(t_prob[1],4)) #other stats results['avg_diff'] = format(round(np.mean(diffs),4)) results['numSamples'] = numSamples results['sampleSize'] = sampleSize results['num_pairs'] = len(pairs['tor']) return results
def linear_harvey_collier(res): '''Harvey Collier test for linearity The Null hypothesis is that the regression is correctly modeled as linear. Parameters ---------- res : Result instance Returns ------- tvalue : float test statistic, based on ttest_1sample pvalue : float pvalue of the test Notes ----- TODO: add sort_by option This test is a t-test that the mean of the recursive ols residuals is zero. Calculating the recursive residuals might take some time for large samples. ''' #I think this has different ddof than #B.H. Baltagi, Econometrics, 2011, chapter 8 #but it matches Gretl and R:lmtest, pvalue at decimal=13 rr = recursive_olsresiduals(res, skip=3, alpha=0.95) from scipy import stats return stats.ttest_1samp(rr[3][3:], 0)
def ttest(self): t =[] p = [] archaeology = self.arc background = self.bac print 'ttestloop' print archaeology.shape print background.shape t_test = stats.ttest_1samp(archaeology, background) #t_test = stats.ttest_ind(archaeology, background) t.append(t_test[0]) p.append(t_test[1]) t_list = np.array(t) print 'TSHAPE', t_list.shape p_list = np.array(p) print 'PSHAPE', p_list.shape os.chdir(self.plot_dir) np.savetxt(self.name+'_t.txt',np.mean(t_list, axis=1), delimiter=',') np.savetxt(self.name+'_p.txt',np.mean(p_list, axis=1), delimiter=',')
def is_directionally_biased(ase, gene, bias_direction=None, style='ttest', ase_level=0.33, min_slices=10, too_few_slices_val=99, frac_for_biased=0.65, two_tailed=False, alpha=.05): if bias_direction is None: bias_direction = [1 for col in ase.columns] genotypes = {col.split('_')[0] for col in ase.columns} biases = {} for genotype in genotypes: genease = (ase.ix[gene] * bias_direction).select(startswith(genotype)) if style == 'ttest': tstat, pval = ttest_1samp(genease, 0, nan_policy='omit') if isinstance(pval, np.ma.core.MaskedConstant): biases[genotype] = too_few_slices_val continue if two_tailed: biases[genotype] = np.sign(tstat) * (pval * len(ase) < alpha) else: pval = pval/2 if tstat > 0 else 1-pval/2 biases[genotype] = pval * len(ase) < alpha elif style == 'cutoff': slices_with_aseval = genease.count() if slices_with_aseval < min_slices: biases[genotype] = too_few_slices_val continue biases[genotype] = 0 for dir in [-1, 1]: if ((dir * genease > ase_level).sum() > max(frac_for_biased * slices_with_aseval, min_slices)): biases[genotype] = dir break else: raise NotImplementedError("Don't know how to use test style '{}'".format(style)) return biases
def arg_lys_ratio_comp(gene_list, hit_list): """ returns a letter for the one sample t-test of RK ratio: Z - all hit RK ratios are the same, can't runt the test S - no significant difference A - psychrophilic query is significantly different and higher than targets B - psychrophilic query is significantly different and lower than targets N - RK ratio for query couldn't be calculated due to K = 0 for query """ rk_q = gene_list[43] if rk_q != 'N/A': # check if RK ratio was not calculated due to K = 0 rk_q = float(rk_q) # remove all the RK ratios where there were no lysines (i.e. N/A) rk_t = [float(i[43]) for i in hit_list if i[43] != 'N/A'] # if rest of the numbers are the same (1), or there are less than 3 # numbers (2) or all values are N/A and an empty list is returned (0) # then return Z if len(set(rk_t)) <= 2: return 'Z' with np.errstate(divide='ignore'): p_value = stats.ttest_1samp(rk_t, rk_q)[1] if p_value <= 0.05: if rk_q > np.mean(rk_t): return 'A' else: return 'B' else: return 'S' else: return 'N'
def ttest(self, threshold_dict=None): """ Calculate one sample t-test across each voxel (two-sided) Args: self: Brain_Data instance threshold_dict: a dictionary of threshold parameters {'unc':.001} or {'fdr':.05} Returns: out: dictionary of regression statistics in Brain_Data instances {'t','p'} """ # Notes: Need to add FDR Option t = deepcopy(self) p = deepcopy(self) t.data, p.data = ttest_1samp(self.data, 0, 0) if threshold_dict is not None: if type(threshold_dict) is dict: if 'unc' in threshold_dict: #Uncorrected Thresholding t.data[np.where(p.data>threshold_dict['unc'])] = np.nan elif 'fdr' in threshold_dict: pass else: raise ValueError("threshold_dict is not a dictionary. Make sure it is in the form of {'unc':.001} or {'fdr':.05}") out = {'t':t, 'p':p} return out
def paired_data(): '''Analysis of paired data Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).''' # Get the data: daily intake of energy in kJ for 11 women data = getData('altman_93.txt', subDir=r'..\Data\data_altman') mean(data, axis=0) std(data, axis=0, ddof=1) pre = data[:,0] post = data[:,1] # --- >>> START stats <<< --- # paired t-test: doing two measurments on the same experimental unit # e.g., before and after a treatment t_statistic, p_value = stats.ttest_1samp(post - pre, 0) # p < 0.05 => alternative hypothesis: # the difference in mean is not equal to 0 print(("paired t-test", p_value)) # alternative to paired t-test when data has an ordinary scale or when not # normally distributed rankSum, p_value = stats.wilcoxon(post - pre) # --- >>> STOP stats <<< --- print(("Wilcoxon-Signed-Rank-Sum test", p_value)) return p_value # should be 0.0033300139117459797
def testPermutation(testResult, permutationResult): ''' Method that essentially runs a t-test to determine if the test result has significantly better error than the permutation set ''' # test data trueTest = testResult[:, 0] predTest = testResult[:, 1] # permutation data truePermut = permutationResult[:, 0, :] predPermut = permutationResult[:, 1, :] # Get the MSE for the empirical values empMSE = np.mean(np.square(trueTest - predTest)) # Now make a distribution of MSE from the permutations distMSE = np.mean(np.square(truePermut - predPermut), axis=0) # Run a one sample t-test on this thing - if significant, then our # empirical MSE is significantly different from the permuted one tValue, pValue = st.ttest_1samp(distMSE, empMSE) # Do zarrars p value smallerEmp = distMSE < empMSE nSmaller = np.sum(smallerEmp) pZarrar = float(nSmaller) / len(distMSE) # return empMSE, distMSE, tValue, pValue return empMSE, distMSE, pZarrar
def xcorr_stability(x1,x2,window_len=200,overlap=190): """Go through the arrays x1,x2 in a moving-window-fashion. For each window, calculate the crosscorrelation function, determine tau (time delay). Then, for the tau of all windows, calculate t-stats to test H0 that the taus are zero. If not, we have sync (and directionality?) """ if not len(x1.shape)==len(x2.shape)==1: raise ValueError("Input arrays must be 1d") if not x1.shape[0]==x2.shape[0]: raise ValueError("Input arrays must have same length.") #TODO: check sanity of window_len and overlap ndp = x1.shape[0] #number of datapoints taus = np.zeros((ndp-window_len)/(window_len-overlap)) start = 0 for i in range(len(taus)): #(0,ndp-window_len,window_len-overlap)): ar1 = x1[start:start+window_len] ar2 = x2[start:start+window_len] xcorr = np.correlate(ar1,ar2,mode="same") tau = xcorr.argmax()-xcorr.shape[0]/2 taus[i]=tau start += window_len-overlap #print taus, taus.mean(),taus.std() t,p = ttest_1samp(taus,0) return (t,p), taus, np.tanh(t)
def summarize(self, path): self.path = path """ Function who saves the union of subject's map to easily see it and the mean map across subjects """ threshold_value = self._p_value total_map = self._total_map affine = self._affine radius = self._radius total_map = np.rollaxis(np.array(total_map), 0, 4) total_img = ni.Nifti1Image(total_map, affine=affine) fname = "accuracy_map_radius_%s_searchlight_all_subj.nii.gz" % radius ni.save(total_img, os.path.join(self.path,fname)) mean_img = ni.Nifti1Image(total_map.mean(3), affine=affine) fname = "accuracy_map_radius_%s_searchlight_mean_subj.nii.gz" % radius ni.save(mean_img, os.path.join(self.path,fname)) logging.info('Summarizer writed in '+self.path) t, p = ttest_1samp(total_map, threshold_value, axis=3) fname = "t_map_vs_threshold_%s_uncorrected.nii.gz" % threshold_value _img = ni.Nifti1Image(t, affine=affine) ni.save(_img, os.path.join(self.path, fname)) fname = "p_map_vs_threshold_%s_uncorrected.nii.gz" % threshold_value _img = ni.Nifti1Image(p, affine=affine) ni.save(_img, os.path.join(self.path, fname))
def compareWithNormal(): '''This function is supposed to give you an idea how big/small the difference between t- and normal distribution are for realistic calculations. ''' # generate the data np.random.seed(12345) normDist = stats.norm(loc=7, scale=3) data = normDist.rvs(100) checkVal = 6.5 # T-test # --- >>> START stats <<< --- t, tProb = stats.ttest_1samp(data, checkVal) # --- >>> STOP stats <<< --- # Comparison with corresponding normal distribution mmean = np.mean(data) mstd = np.std(data, ddof=1) normProb = stats.norm.cdf(checkVal, loc=mmean, scale=mstd/np.sqrt(len(data)))*2 # compare print(('The probability from the t-test is ' + '{0:5.4f}, and from the normal distribution {1:5.4f}'.format(tProb, normProb))) return normProb # should be 0.054201154690070759
def match_to_key_fgrams_paired(raw_fgram, key_fgrams, floworder): """ @TODO: jhoon """ from scipy import stats pvals = [] key_pairings = [] n_nucs = len(floworder) for keyindex,kf in enumerate(key_fgrams): pairs = {} for nuc in floworder: pairs[nuc] = [None,None] for ndx,expected in enumerate(kf): nuc = floworder[ndx%n_nucs] pairs[nuc][int(expected)] = ndx topop = [k for k,v in pairs.iteritems() if None in v] for k in topop: pairs.pop(k) diffs = [] for k,v in pairs.iteritems(): #random.shuffle(v) diffs.append(raw_fgram[v[1]] - raw_fgram[v[0]]) t,pval2side = stats.ttest_1samp(diffs, 0.0) pval2side /= 2 pvals.append((pval2side,keyindex)) pvals.sort() return pvals[0][-1]
def test_weightstats_3(self): x1_2d, x2_2d = self.x1_2d, self.x2_2d w1, w2 = self.w1, self.w2 d1w_2d = DescrStatsW(x1_2d, weights=w1) d2w_2d = DescrStatsW(x2_2d, weights=w2) x1r_2d = d1w_2d.asrepeats() x2r_2d = d2w_2d.asrepeats() assert_almost_equal(x2r_2d.mean(0), d2w_2d.mean, 14) assert_almost_equal(x2r_2d.var(0), d2w_2d.var, 14) assert_almost_equal(x2r_2d.std(0), d2w_2d.std, 14) assert_almost_equal(np.cov(x2r_2d.T, bias=1), d2w_2d.cov, 14) assert_almost_equal(np.corrcoef(x2r_2d.T), d2w_2d.corrcoef, 14) # print d1w_2d.ttest_mean(3) # #scipy.stats.ttest is also vectorized # print stats.ttest_1samp(x1r_2d, 3) t, p, d = d1w_2d.ttest_mean(3) assert_almost_equal([t, p], stats.ttest_1samp(x1r_2d, 3), 11) # print [stats.ttest_1samp(xi, 3) for xi in x1r_2d.T] cm = CompareMeans(d1w_2d, d2w_2d) ressm = cm.ttest_ind() resss = stats.ttest_ind(x1r_2d, x2r_2d) assert_almost_equal(ressm[:2], resss, 14)
def gen_bonferroni_corrected_graph(indata, alpha = .01): """ given indata (4D array subjects X nnodes X nnodes) and alpha (rejection level for bonferroni correction) calculate significant connections across cohort return thresholded t-value map and binary map """ data = np.load(indata) data = data.squeeze() # remove singular dims (eg we only have 1 block) ind = util.triu_indices(data.shape[-1], 1)# ind of upper tri minus diag lowind = util.tril_indices(data.shape[-1], -1) # ind of lower triag minusdiag tvals, pvals = ss.ttest_1samp(data, 0, axis = 0) ## rfx ttest across subjects ## for each region (reject, pvals_corrected, alphacSidak, alphacBonf) = sms.multipletests(pvals[ind], alpha = alpha, method='bonferroni') (lreject, lpvals_corrected, lalphacSidak, lalphacBonf) = sms.multipletests(pvals[lowind], alpha = alpha, method='bonferroni') sparsity = calc_sparsity(reject) print 'sparsity', sparsity mask = np.zeros(tvals.shape, dtype = np.bool) mask[ind] = reject mask[lowind] = lreject mask[tvals < 0] = False # remove negative correlations tvals[mask == False] = 0 return mask, tvals, sparsity
def new_returning(self): print("\n\n" "##############################" "Sentiments of (New - Returning) WAs" "##################################") new_wa = self.data[(self.data.status == 'NEW') & self.data.s_value] ret_wa = self.data[(self.data.status == 'RET') & self.data.s_value] pair_deltas = [] n_ids = new_wa['ID'].values r_ids = ret_wa['ID'].values for id in n_ids: if id in r_ids: t = new_wa[new_wa.ID == id].s_value.values s = ret_wa[ret_wa.ID == id].mean(axis=0).s_value pair_deltas.append(t-s) self.print_descriptively(label="Numbers for New WAs", data=(len(n_ids),new_wa.mean().s_value,new_wa.var().s_value)) self.print_descriptively(label="Numbers for Returning WAs", data=(len(r_ids),ret_wa.mean().s_value,ret_wa.var().s_value)) new_male_was = self.data[(self.data.ID in n_ids) & (self.data.gender == 'm')].s_value new_female_was = self.data(self.data.ID in r_ids) & (self.data.gender == 'f')].s_value # mean_pair_deltas = np.mean(pair_deltas) # var_pair_deltas = np.std(pair_deltas,ddof=1,dtype=np.float64) # t_pair_deltas = mean_pair_deltas/(var_pair_deltas/sqrt(len(pair_deltas)+0.0)) t_pair_deltas, p_value_pair_deltas = stats.ttest_1samp(pair_deltas, 0) self.print_descriptively(label="New vs Returning WAs", data=(len(pair_deltas),np.mean(pair_deltas),np.var(pair_deltas)))
def test_permutation_t_test(): """Test T-test based on permutations.""" # 1 sample t-test np.random.seed(10) n_samples, n_tests = 30, 5 X = np.random.randn(n_samples, n_tests) X[:, :2] += 1 T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=0) is_significant = p_values < 0.05 assert_array_equal(is_significant, [True, True, False, False, False]) T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=1) is_significant = p_values < 0.05 assert_array_equal(is_significant, [True, True, False, False, False]) T_obs, p_values, H0 = permutation_t_test(X, n_permutations=999, tail=-1) is_significant = p_values < 0.05 assert_array_equal(is_significant, [False, False, False, False, False]) X = np.random.randn(18, 1) T_obs, p_values, H0 = permutation_t_test(X[:, [0]], n_permutations='all') T_obs_scipy, p_values_scipy = stats.ttest_1samp(X[:, 0], 0) assert_almost_equal(T_obs[0], T_obs_scipy, 8) assert_almost_equal(p_values[0], p_values_scipy, 2)
def check_mean(): '''Data from Altman, check for significance of mean value. Compare average daily energy intake (kJ) over 10 days of 11 healthy women, and compare it to the recommended level of 7725 kJ. ''' # Get data from Altman data = getData('altman_91.txt') # Watch out: by default the SD is calculated with 1/N! myMean = np.mean(data) mySD = np.std(data, ddof=1) print 'Mean and SD: {0:4.2f} and {1:4.2f}'.format(myMean, mySD) # Confidence intervals tf = stats.t(len(data)-1) ci = np.mean(data) + stats.sem(data)*np.array([-1,1])*tf.isf(0.025) print 'The confidence intervals are {0:4.2f} to {1:4.2f}.'.format(ci[0], ci[1]) # Check for significance checkValue = 7725 t, prob = stats.ttest_1samp(data, checkValue) if prob < 0.05: print '{0:4.2f} is significantly different from the mean (p={1:5.3f}).'.format(checkValue, prob) # For not normally distributed data, use the Wilcoxon signed rank test (rank, pVal) = stats.wilcoxon(data-checkValue) if pVal < 0.05: issignificant = 'unlikely' else: issignificant = 'likely' print 'It is ' + issignificant + ' that the value is {0:d}'.format(checkValue)
def quantify(self, samples=None): """ Get average ratio and p-value from ratio t-test Ratio t-test: log-transform ratios, then t-test against 0 samples: a subset of samples to quantify across e.g if both heavy and light samples included, it only makes sense to average ratios across one set or the other. """ from scipy import stats if samples: ratios = [q['ratio'] for s,q in self.quantification.items() if s in samples] else: ratios = [q['ratio'] for q in self.quantification.values()] # Filter out 0 and NaN ratios = np.array([r for r in ratios if r != 0]) ratios = ratios[~np.isnan(ratios)] log_ratios = np.log(ratios) t, p = stats.ttest_1samp(log_ratios, 0) self.avg_ratio = np.mean(ratios) self.p_value = p
def one_sample_ttest(category,base): rating,population=load_rating_data(category) rating = np.array(rating) population=np.array(population) t4, prob4 = stats.ttest_1samp(rating,base) print("t value of "+category+str(t4)) print("p value of "+category+str(prob4))
out_file = 'BMI_data_China.svg' show_data(out_file, out_dir='.') # Plot for the 1-sample T-test plt.plot(samples['China'], 'o', label='China') plt.hlines(25, 0, num_samples, linestyles='--') plt.xlim(0, 50) plt.xlabel('Subject-Nr') plt.ylabel('BMI') plt.legend() plt.tight_layout() out_file = 'BMI_China.jpg' show_data(out_file, out_dir='.') # ... and do the corresponding T-test t, p = stats.ttest_1samp(samples['China'], popmean=25) print(f'Is China just at the limit of over-weight (BMI=25)? p={p}') # Comparison between two independent groups plt.plot(samples['Germany'], 'b*', ms=3, label='Germans') plt.hlines(np.mean(samples['Germany']), 0, num_samples, linestyles='--', color='b', label='mean-Germany') plt.plot(np.arange(num_samples, 2 * num_samples), samples['Austria'], 'ro', ms=3,
def compute_transitions(self): # analogous code using networkx # membership = adata.obs['clusters'].cat.codes.tolist() # partition = defaultdict(list) # for n, p in zip(list(range(len(G))), membership): # partition[p].append(n) # partition = partition.values() # g_abstracted = nx.quotient_graph(g, partition, relabel=True) # for some reason, though, edges aren't oriented in the quotient # graph... import igraph g = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'], directed=True) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) cg_full = vc.cluster_graph(combine_edges=False) g_bool = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'].astype('bool'), directed=True) vc_bool = igraph.VertexClustering( g_bool, membership=self._adata.obs[self._groups_key].cat.codes.values) cg_bool = vc_bool.cluster_graph(combine_edges='sum') # collapsed version transitions = utils.get_sparse_from_igraph(cg_bool, weight_attr='weight') # translate this into a confidence measure # the number of outgoing edges # total_n = np.zeros(len(vc.sizes())) # # (this is not the convention of standard stochastic matrices) # total_outgoing = transitions.sum(axis=1) # for i in range(len(total_n)): # total_n[i] = vc.subgraph(i).ecount() # total_n[i] += total_outgoing[i, 0] # use the topology based reference, the velocity one might have very small numbers total_n = self._neighbors.n_neighbors * np.array(vc_bool.sizes()) transitions_ttest = transitions.copy() transitions_confidence = transitions.copy() from scipy.stats import ttest_1samp for i in range(transitions.shape[0]): # no symmetry in transitions, hence we should not restrict to # upper triangle neighbors = transitions[i].nonzero()[1] for j in neighbors: forward = cg_full.es.select(_source=i, _target=j)['weight'] backward = cg_full.es.select(_source=j, _target=i)['weight'] # backward direction: add minus sign values = np.array(list(forward) + list(-np.array(backward))) # require some minimal number of observations if len(values) < 5: transitions_ttest[i, j] = 0 transitions_ttest[j, i] = 0 transitions_confidence[i, j] = 0 transitions_confidence[j, i] = 0 continue t, prob = ttest_1samp(values, 0.0) if t > 0: # number of outgoing edges greater than number of ingoing edges # i.e., transition from i to j transitions_ttest[i, j] = -np.log10(max(prob, 1e-10)) transitions_ttest[j, i] = 0 else: transitions_ttest[j, i] = -np.log10(max(prob, 1e-10)) transitions_ttest[i, j] = 0 # geom_mean geom_mean = np.sqrt(total_n[i] * total_n[j]) diff = (len(forward) - len(backward)) / geom_mean if diff > 0: transitions_confidence[i, j] = diff transitions_confidence[j, i] = 0 else: transitions_confidence[j, i] = -diff transitions_confidence[i, j] = 0 transitions_ttest.eliminate_zeros() transitions_confidence.eliminate_zeros() # transpose in order to match convention of stochastic matrices # entry ij means transition from j to i self.transitions_ttest = transitions_ttest.T self.transitions_confidence = transitions_confidence.T
print(df.tail()) print(df.columns) # extracting column names print(df.index) # extracting row names or the index print(df.T) # transpose data print(df.sort) # print(df.ix[:,0].head()) # extracting a specific column print(df.ix[10:20,0:3]) print(df.drop(df.columns[[1,2]], axis = 1).head()) #axis tells the function to drop with respect to columns,if axis=0, then the index #Descriptive Statistics print(df.describe()) #Hypothesis testing #perform one sample t-test using 1500 as the true mean print(ss.ttest_1samp(a=df.ix[:,'Abra'],popmean=1500)) #Visulization matplotlib.rcdefaults() plt.show(df.plot(kind = 'box')) pd.options.display.mpl_style = 'default' # Sets the plotting display theme to ggplot2 df.plot(kind = 'box') sns.boxplot(data=df,width=0.5) sns.violinplot(df,width=3.5) plt.show(sns.distplot(df.ix[:,2], rug = True, bins = 15)) with sns.axes_style("white"):
from scipy import stats from scipy.stats import t __author__ = 'zzt' def ttest(a, mu): ave = mean(a) s = std(a, ddof=1) tv = (ave - mu) / s * sqrt(len(a)) p = t.sf(abs(tv), len(a) - 1) * 2 return [tv, p] class Solution(): def ttest_1samp(self, a, popmean): if len(a) == 0: return [None, None] return self.res(ttest(a, popmean)) def res(self, a): return [round(x, 6) for x in a] if __name__ == '__main__': rvs = [2, 2, 3] print(stats.ttest_1samp(rvs, 2)) # print(ttest(rvs, 2)) s = Solution() print(s.ttest_1samp(rvs, 2))
import numpy as np import csv from scipy import stats # I. Load data iq_data = np.loadtxt("iqdata.csv") iq1 = iq_data[0:10000] f = open("testcaseiq.txt") nooftestcases = f.readline().strip() for i in range(1, int(nooftestcases) + 1): with open("output{}.csv".format(i), "w") as out: writer = csv.writer(out, delimiter=",") writer = csv.writer(out) writer.writerow([str(round(np.mean(iq1), 2))]) writer.writerow([str(round(np.std(iq1), 2))]) lower_value = f.readline().strip() upper_value = f.readline().strip() probability = np.subtract( stats.norm(np.mean(iq1), np.std(iq1)).cdf(int(upper_value)), stats.norm(np.mean(iq1), np.std(iq1)).cdf(int(lower_value))) * 100 writer.writerow([str(np.round(probability, 3))]) file = f.readline().strip() sample = pd.read_csv("{}.csv".format(file)) p_value = stats.ttest_1samp(a=sample, popmean=np.mean(iq1)) if p_value[1][0] < 0.05: writer.writerow(["Reject"]) else: writer.writerow(["Accept"])
def check_sample_mean(sample, popmean): # Checks for unlikely difference between sample mean and population mean prob = stats.ttest_1samp(sample, popmean).pvalue assert prob > 0.01
def Ses_test(Yt, columns): seasonal = [False, False, False, False] # index = Yt.ne(0).idxmax() # Yt = Yt[index:] # columns=columns[index:] Yt = Yt.reset_index(drop=True) Yt, Ses_3 = Out_function_1(Yt) Yt[np.where((Yt == 0) == True)[0]] = 1 #Test 1: Statistical test of seasonal Index if len(Yt) > 5: try: result = seasonal_decompose(Yt, model='multiplicative', freq=5) except: result = seasonal_decompose(Yt, model='additive', freq=5) seasonal_index = result.seasonal f, p = stats.ttest_1samp(seasonal_index, 1) if p < 0.05: seasonal[0] = True #Test 2: Frequency and acf res = acf(Yt) ub = 1.96 / np.sqrt(len(Yt)) for i in range(1, len(res) - 1): if (res[i] > ub and res[i + 1] < ub): p = i if (p > 12): p = 12 break else: p = 12 d = {'date': columns, 'data': Yt} ts_data = pd.DataFrame(d) ts_data.set_index('date', inplace=True) # Test 1: periodogram # estimate spectral density freq = [0] * len(Yt) freq[0] = 12 / len(Yt) for i in range(1, len(Yt)): freq[i] = freq[i - 1] + freq[0] freq = np.array(freq) f, spec = signal.periodogram(Yt) freq = freq[:len(spec)] freq_max = max(freq) ind = np.where(freq == freq.max()) if freq_max < p + 1.5 and freq_max > p - 1.5: seasonal[1] = True # print(freq) #f,spec=scipy.signal.welch(Yt,fs=100,scaling='density') #f,spec=signal.periodogram(Yt,nfft=None,return_onesided = True,scaling = "density",detrend='constant') # select higher frequencies # bool= (freq > 0.5 ) # spec = spec[bool] # freq = freq[bool] # id=np.where(spec == spec.max())[0] # if len(id)>1: # id=id[0] # freq = freq[id] # if freq > 0.85 and freq < 1.15: # seasonal[1] = True # Test 2: auto-correlation function try: Tt = stldecompose.decompose(ts_data).trend #extract the trend element except: Tt = [None] * len(Yt) Tt = Tt.reset_index(drop=True)['data'] if sum(Tt.isnull()) == 0: At = Yt - Tt # detrend time series acf_val = acf(At) lag_val = [0] * 75 lag_val[0] = 0 for i in range(1, len(lag_val)): lag_val[i] = lag_val[i - 1] + (1 / 12) ind = np.where(acf_val == np.min(acf_val))[0] lag = lag_val[ind[0]] if lag < p + 1.5 and lag < p - 1.5: seasonal[2] = True else: seasonal[2] = False # Test 3: seasonal model #seasonal ---> cycle() seas = [0] * len(Yt) j = 1 for i in range(len(Yt)): seas[i] = j j += 1 if j > 12: j = 1 #trend --> time() trend = [0] * len(Yt) trend[0] = 1 for i in range(1, len(Yt)): trend[i] = trend[i - 1] + (1 / 12) d = {'Yt': Yt, 'seas': seas, 'trend': trend} df = pd.DataFrame(d) X = df[["seas", "trend"]] y = df["Yt"] m1 = sm.OLS(y, X).fit() X1 = df[["trend"]] m2 = sm.OLS(y, X1).fit() bic = [m1.bic, m2.bic] arrind = np.where(bic == np.min(bic))[0][0] bic_min = bic[arrind] if arrind == 0: seasonal[3] = True return seasonal
plt.show() #scipy #justpaste.it/6ov7y #justpaste.it/5woh8 #test hypothesis #Hypothesis is a claim which can be true or falls #According to KQ the mean flight transaction is 56000. #null hypothesis(Ho) - the mean(flight trans) is equal to 56000 #alternative hypothesis(H1)- our sample mean is not equal to 56000 import scipy #install from scipy.stats import ttest_1samp statistics, pvalue = ttest_1samp(df['FlightTrans'], 56000) print('p value is :', pvalue) alpha = 0.05 if pvalue < alpha: print('Reject Null Hypothesis') print('Accept the alternative') print('Alternative Hypothesis(H1)-our sample mean is not equal to 56000') else: print('Accept Null Hypothesis') print( 'Null Hypothesis(H0) - Our sample mean(FlightTrans) is equal to 56000') #sample, ANOVA, Chi square #work on something ,,, get any data set , do A few plots #on eithher classification, regresssion or clustering #1 page document explaining your work and a link to your code,
import random import numpy as np import scipy.stats as ss import matplotlib.pyplot as plt x = random.sample(range(0, 501), 100) print x y = random.sample(range(0, 501), 100) print y # The following code tests our random sample x against the mean of 250. print 'H0 = There is no statistical significance between the random sample x and a mean of the value 250.' print 'HA = There is a statistical significance between the random sample x and a mean of the value 250.' ttest_sampx = ss.ttest_1samp(x, 250) print ttest_sampx # The following code uses the output of the t-test to print the correct analysis. if ttest_sampx[1] > 0.05: print "The H0 is not rejected. There is no statistical evidence against the H0." elif ttest_sampx[1] < 0.01: print "H0 is rejected. There is statistical evidence for the H0." else: print "There is a weak correlation between the samples x and a mean of 250, therefore the H0 is rejected." print "__________________________________" # The following code tests our random sample y against the mean of 250. print "___________________________________"
# -*- coding: utf-8 -*- """ Created on Wed Jul 4 23:09:05 2018 @author: nandavari """ from scipy.stats import norm import numpy as np print(norm.cdf(np.array([1, -1, 0, 1, 3, 4, -2, 6]))) #To find the median of a distribution, we can use the Percent Point Function (PPF), which is the inverse of the CDF from scipy.stats import norm print(norm.ppf(0.5)) #To generate a sequence of random variates use size keyword args from scipy.stats import norm print(norm.rvs(size=5)) #Uniform Distribution #uniform distribution can be generated using the uniform function from scipy.stats import uniform print(uniform.cdf([0, 1, 2, 3, 4, 5], loc=1, scale=4)) #ttest_1samp from scipy import stats rvs = stats.norm.rvs(loc=5, scale=10, size=(50, 2)) print(stats.ttest_1samp(rvs, 5.0))
def compute_transitions_old(self): import igraph g = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'], directed=True) vc = igraph.VertexClustering( g, membership=self._adata.obs[self._groups_key].cat.codes.values) # this stores all single-cell edges in the cluster graph cg_full = vc.cluster_graph(combine_edges=False) # this is the boolean version that simply counts edges in the clustered graph g_bool = utils.get_igraph_from_adjacency( self._adata.uns['velocyto_transitions'].astype('bool'), directed=True) vc_bool = igraph.VertexClustering( g_bool, membership=self._adata.obs[self._groups_key].cat.codes.values) cg_bool = vc_bool.cluster_graph( combine_edges='sum') # collapsed version transitions = utils.get_sparse_from_igraph(cg_bool, weight_attr='weight') total_n = self._neighbors.n_neighbors * np.array(vc_bool.sizes()) transitions_ttest = transitions.copy() transitions_confidence = transitions.copy() from scipy.stats import ttest_1samp for i in range(transitions.shape[0]): neighbors = transitions[i].nonzero()[1] for j in neighbors: forward = cg_full.es.select(_source=i, _target=j)['weight'] backward = cg_full.es.select(_source=j, _target=i)['weight'] # backward direction: add minus sign values = np.array(list(forward) + list(-np.array(backward))) # require some minimal number of observations if len(values) < 5: transitions_ttest[i, j] = 0 transitions_ttest[j, i] = 0 transitions_confidence[i, j] = 0 transitions_confidence[j, i] = 0 continue t, prob = ttest_1samp(values, 0.0) if t > 0: # number of outgoing edges greater than number of ingoing edges # i.e., transition from i to j transitions_ttest[i, j] = -np.log10(max(prob, 1e-10)) transitions_ttest[j, i] = 0 else: transitions_ttest[j, i] = -np.log10(max(prob, 1e-10)) transitions_ttest[i, j] = 0 # geom_mean geom_mean = np.sqrt(total_n[i] * total_n[j]) diff = (len(forward) - len(backward)) / geom_mean if diff > 0: transitions_confidence[i, j] = diff transitions_confidence[j, i] = 0 else: transitions_confidence[j, i] = -diff transitions_confidence[i, j] = 0 transitions_ttest.eliminate_zeros() transitions_confidence.eliminate_zeros() # transpose in order to match convention of stochastic matrices # entry ij means transition from j to i self.transitions_ttest = transitions_ttest.T self.transitions_confidence = transitions_confidence.T
###Smote #Only numeric/boolean and non_null values as input to TSNE model :: BETTER TRY THIS AFTER MISSING VALUE IMPUTATION AND ENCODING from imblearn.over_sampling import SMOTE sm = SMOTE(random_state=42) X_train_new, y_train_new = sm.fit_sample(train.dropna().iloc[:, 1:44], train.dropna()['Dependent_Variable']) #####Check if sample is representing the population: Central Limit Theorem, https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test #Hyothesis testing, Degree of Freedom, t-statistics student t-test etc. #References: http://www.scipy-lectures.org/packages/statistics/index.html#pairplot-scatter-matrices #scipy.stats.ttest_1samp() tests if the population mean of data is likely to be equal to a given value (technically if observations are drawn from a Gaussian distributions of given population mean). It returns the T statistic, and the p-value ##1-sample ttest stats.ttest_1samp(data['VIQ'], 0) stats.ttest_1samp(train['N32'].dropna(), 0) #2-sample ttest female_viq = data[data['Gender'] == 'Female']['VIQ'] male_viq = data[data['Gender'] == 'Male']['VIQ'] stats.ttest_ind(female_viq, male_viq) #paired ttest stats.ttest_ind(data['FSIQ'], data['PIQ']) stats.ttest_1samp(data['FSIQ'] - data['PIQ'], 0) #Skewness and kurtosis #Skewness is a measure of asymmetry. Kurtosis is a more subtle measure of peakedness compared to a Gaussian distribution. from scipy.stats import kurtosis, skew kurtosis(train['N35'].notnull())
def tek_orneklem_t_testi(self, beklenen_deger): return float( stats.ttest_1samp(self.choice_array, popmean=beklenen_deger).pvalue)
for col in group.columns: if col.startswith('k_'): # plot the quantiles plot to see if the data is normally distributed fig = qqplot(group[col], line='45') plot_dir = os.path.join(PATHS['figures_dir'], 'quantile-plots', event, structure, '{:1.1f}'.format(speed)) plot_dir = utils.mkdir(plot_dir) fig.savefig(os.path.join(plot_dir, '{}.png'.format(col))) plt.close(fig) # compute the t statistic to see if the value is significantly # different than zero t_stat, p_val = ttest_1samp(group[col], 0.0) index.append(col) t_vals.append(t_stat) p_vals.append(p_val) #mark = np.zeros((num_schedules, num_sensors, num_actuators), dtype=bool) mark = np.zeros((20, 6, 12), dtype=bool) for gain, p_val in zip(index, p_vals): if p_val < 0.05: i, j, k = [int(n) for n in gain[2:].split('_')] mark[i, j, k] = True sig_marks[(speed, event)] = mark
# seaborn : Matplotlib을 기반으로 다양한 색상 테마와 통계용 차트 등의 기능을 추가한 시각화 패키지이다. # one samples t-test # 어느 한 집단의 평균은 0인지 검정하기(난수 사용) # 귀무 : 자료들의 평균은 0이다. # 대립 : 자료들의 평균은 0이 아니다. np.random.seed(123) mu = 0 n = 10 # 데이터가 많아질 수록 0에 가까워진다. ex) 1000, 10000 등 x = stats.norm(mu).rvs(n) # norm : 정규분포, rvs : 랜덤 표본 생성 print(x, np.mean(x)) # mean : -0.26951611032632805 # sns.distplot(x, kde=False, rug=True, fit=stats.norm) # 시각화 # kde=False을 넘겨주면 밀도 그래프를 그리지 않는다. # plt.show() result = stats.ttest_1samp(x, popmean=0) # (데이터, 예상평균값) print('result : ', result) # Ttest_1sampResult(statistic=-0.6540040368674593, pvalue=0.5294637946339893) statistic : 검정평균 # 해석 : p-value(0.529463) > 0.05(유의수준) 이므로 귀무 채택. 자료들의 평균은 0이다. # * 단일 모집단의 평균에 대한 가설검정(one samples t-test) # 실습 예제 1) # A중학교 1학년 1반 학생들의 시험결과가 담긴 파일을 읽어 처리 (국어 점수 평균검정) student.csv' # 귀무 : 국어 점수의 평균은 80이다. # 대립 : 국어 점수의 평균은 80이 아니다 data = pd.read_csv('../testdata/student.csv') print(data.head()) print(data.describe()) result2 = stats.ttest_1samp(data.국어, popmean=80) print('result2 : ', result2)
def descstats(data, cols=None, axis=0): ''' Prints descriptive statistics for one or multiple variables. Parameters ------------ data: numpy array `x` is the data v: list, optional A list of the column number or field names (for a recarray) of variables. Default is all columns. axis: 1 or 0 axis order of data. Default is 0 for column-ordered data. Examples -------- >>> descstats(data.exog,v=['x_1','x_2','x_3']) ''' x = np.array(data) # or rather, the data we're interested in if cols is None: # if isinstance(x, np.recarray): # cols = np.array(len(x.dtype.names)) if not isinstance(x, np.recarray) and x.ndim == 1: x = x[:,None] if x.shape[1] == 1: desc = ''' --------------------------------------------- Univariate Descriptive Statistics --------------------------------------------- Var. Name %(name)12s ---------- Obs. %(nobs)22i Range %(range)22s Sum of Wts. %(sum)22s Coeff. of Variation %(coeffvar)22.4g Mode %(mode)22.4g Skewness %(skewness)22.4g Repeats %(nmode)22i Kurtosis %(kurtosis)22.4g Mean %(mean)22.4g Uncorrected SS %(uss)22.4g Median %(median)22.4g Corrected SS %(ss)22.4g Variance %(variance)22.4g Sum Observations %(sobs)22.4g Std. Dev. %(stddev)22.4g ''' % {'name': cols, 'sum': 'N/A', 'nobs': len(x), 'mode': \ stats.mode(x)[0][0], 'nmode': stats.mode(x)[1][0], \ 'mean': x.mean(), 'median': np.median(x), 'range': \ '('+str(x.min())+', '+str(x.max())+')', 'variance': \ x.var(), 'stddev': x.std(), 'coeffvar': \ stats.variation(x), 'skewness': stats.skew(x), \ 'kurtosis': stats.kurtosis(x), 'uss': stats.ss(x),\ 'ss': stats.ss(x-x.mean()), 'sobs': np.sum(x)} # ''' % {'name': cols[0], 'sum': 'N/A', 'nobs': len(x[cols[0]]), 'mode': \ # stats.mode(x[cols[0]])[0][0], 'nmode': stats.mode(x[cols[0]])[1][0], \ # 'mean': x[cols[0]].mean(), 'median': np.median(x[cols[0]]), 'range': \ # '('+str(x[cols[0]].min())+', '+str(x[cols[0]].max())+')', 'variance': \ # x[cols[0]].var(), 'stddev': x[cols[0]].std(), 'coeffvar': \ # stats.variation(x[cols[0]]), 'skewness': stats.skew(x[cols[0]]), \ # 'kurtosis': stats.kurtosis(x[cols[0]]), 'uss': stats.ss(x[cols[0]]),\ # 'ss': stats.ss(x[cols[0]]-x[cols[0]].mean()), 'sobs': np.sum(x[cols[0]])} desc+= ''' Percentiles ------------- 1 %% %12.4g 5 %% %12.4g 10 %% %12.4g 25 %% %12.4g 50 %% %12.4g 75 %% %12.4g 90 %% %12.4g 95 %% %12.4g 99 %% %12.4g ''' % tuple([stats.scoreatpercentile(x,per) for per in (1,5,10,25, 50,75,90,95,99)]) t,p_t=stats.ttest_1samp(x,0) M,p_M=sign_test(x) S,p_S=stats.wilcoxon(np.squeeze(x)) desc+= ''' Tests of Location (H0: Mu0=0) ----------------------------- Test Statistic Two-tailed probability -----------------+----------------------------------------- Student's t | t %7.5f Pr > |t| <%.4f Sign | M %8.2f Pr >= |M| <%.4f Signed Rank | S %8.2f Pr >= |S| <%.4f ''' % (t,p_t,M,p_M,S,p_S) # Should this be part of a 'descstats' # in any event these should be split up, so that they can be called # individually and only returned together if someone calls summary # or something of the sort elif x.shape[1] > 1: desc =''' Var. Name | Obs. Mean Std. Dev. Range ------------+--------------------------------------------------------'''+\ os.linesep # for recarrays with columns passed as names # if isinstance(cols[0],str): # for var in cols: # desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \ #%(range)20s" % {'name': var, 'obs': len(x[var]), 'mean': x[var].mean(), # 'stddev': x[var].std(), 'range': '('+str(x[var].min())+', '\ # +str(x[var].max())+')'+os.linesep} # else: for var in range(x.shape[1]): desc += "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g \ %(range)20s" % {'name': var, 'obs': len(x[:,var]), 'mean': x[:,var].mean(), 'stddev': x[:,var].std(), 'range': '('+str(x[:,var].min())+', '+\ str(x[:,var].max())+')'+os.linesep} else: raise ValueError("data not understood") return desc
'long_short'] = ewret_transposed.winners - ewret_transposed.losers # Compute Long-Short Portfolio Cumulative Returns ewret_transposed['cumret_winners'] = (1 + ewret_transposed.winners).cumprod() - 1 ewret_transposed['cumret_losers'] = (1 + ewret_transposed.losers).cumprod() - 1 ewret_transposed['cumret_long_short'] = ( 1 + ewret_transposed.long_short).cumprod() - 1 # Portfolio Summary # # Mean # mom_mean = ewret_transposed[['winners', 'losers', 'long_short']].mean().to_frame() mom_mean = mom_mean.rename(columns={0: 'mean'}).reset_index() # T-Value and P-Value # t_losers = pd.Series(stats.ttest_1samp(ewret_transposed['losers'], 0.0)).to_frame().T t_winners = pd.Series(stats.ttest_1samp(ewret_transposed['winners'], 0.0)).to_frame().T t_long_short = pd.Series(stats.ttest_1samp(ewret_transposed['long_short'], 0.0)).to_frame().T t_losers['momr'] = 'losers' t_winners['momr'] = 'winners' t_long_short['momr'] = 'long_short' t_output = pd.concat([t_winners, t_losers, t_long_short]).rename(columns={ 0: 't-stat', 1: 'p-value' }) # Combine mean, t and p and format output
def t_statistic(df): stat, p_value = stats.ttest_1samp( a=df[df['Neighborhood'] == 'OldTown']['GrLivArea'], popmean=df['GrLivArea'].mean()) test_result = stats.norm.ppf(.90) return p_value, p_value > test_result
if my_test.find('i') >= 0: pvals = [] nfeats = brain.shape[1] nsubjs = brain.shape[0] for i in range(nfeats): vec = brain[:, i] keep = ~np.isnan(vec) # 2-tailed p-value by default pvals.append(stats.pearsonr(sx[keep], vec[keep])[1]) pvals = np.array(pvals) elif my_test.find('VS') >= 0: nsubjs = brain1.shape[0] + brain2.shape[0] pvals = stats.ttest_ind(brain1, brain2, axis=0, equal_var=True)[1] else: nsubjs = brain.shape[0] pvals = stats.ttest_1samp(brain, popmean=0, axis=0)[1] idx = ~np.isnan(pvals) for a in alphas: reject_fdr, pval_fdr = mne.stats.fdr_correction(pvals[idx], alpha=a, method='indep') num_good = np.sum(reject_fdr) if num_good > 0: print '\n\nGood voxels at %.1e: %d\n\n' % (a, num_good) # if we have any good voxels left, put them in their original positions tvals = -stats.distributions.t.ppf(pvals / 2, nsubjs - 1) tvals[~reject_fdr] = 0 tvals[~idx] = 0 # make .nii with p-values fname = data_dir + my_test + '_FDR_a%.2ft%.2f' % (a, thresh)
def ttest_1samp(X): """Returns T-values """ return stats.ttest_1samp(X, 0)[0]
import numpy from scipy import stats import math # 1. Generate 100 normally distributed random variable with mean=0.5 # and variance=9. n = 100 sample = numpy.random.normal(loc=0.5, scale=3, size=n) # 2. Compute the sample mean and sample variance. x_bar = numpy.mean(sample) s_squared = numpy.var(sample) # 3. Compute the test statistics. t_val = (x_bar - 0) / math.sqrt(s_squared / n) # 4. Compute the p-value. print 2 * (1 - stats.t.cdf(t_val, n - 1, q=True)) #p_value = pt(t, n-1, lower.tail = FALSE)*2 # 5. Use the existing function or procedure to test the hypothesis. # Compare the two results. results = stats.ttest_1samp(sample, popmean=0) print results
pl.plot(x, y, color=plotColors[colorCounter], label=regressorName) downSampledY = [y[index] for index in range(0, len(y), 40)] downSampledX = [x[index] for index in range(0, len(y), 40)] downSampledErr = [stErrs[index] for index in range(0, len(y), 40)] pl.errorbar(downSampledX, downSampledY, yerr=downSampledErr, color=plotColors[colorCounter], ls='none') tTestpValsVs0 = [ ttest_1samp([ allPerObsData[glmIndex][regressorIndex][obsIndex] [timePointIndex] for obsIndex in range( len(allPerObsData[glmIndex][regressorIndex])) ], 0)[1] for timePointIndex in range(0, len(y), 40) ] xForSignificantOnes = [] yForSignificantOnes = [] for candidateIndex in range(len(downSampledY)): if tTestpValsVs0[candidateIndex] < .01: xForSignificantOnes = xForSignificantOnes + [ downSampledX[candidateIndex] ] yForSignificantOnes = yForSignificantOnes + [ downSampledY[candidateIndex] ]
def bootstrap(x1, x2, paired=True, statfunction=None, smoothboot=False, alpha_level=0.05, reps=5000): ''' Computes summary statistics and booststrapped confidence interval for paired data. Keywords: x1, x2: Paired 1D arrays paired: boolean, default True Whether x1 and x2 are paired samples statfunction: function Summary statistic to call on data. Default is np.mean alpha_level: float, default 0.05 alpha = 0.05 gives 95 percent confidence interval reps: int, default = 5000 number of bootstrap replicates Returns: dictionary of statistics ''' # Imports import numpy as np import pandas as pd import seaborn as sns from scipy.stats import norm from numpy.random import randint from scipy.stats import ttest_1samp, ttest_ind, ttest_rel from scipy.stats import mannwhitneyu, wilcoxon, norm import warnings # Turn to pandas series. x1 = pd.Series(x1).dropna() diff = False # Initialise statfunction if statfunction == None: statfunction = np.mean # Compute two-sided alphas. if alpha_level > 1. or alpha_level < 0.: raise ValueError("alpha_level must be between 0 and 1.") alphas = np.array([alpha_level/2., 1-alpha_level/2.]) sns_bootstrap_kwargs = {'func': statfunction, 'n_boot': reps, 'smooth': smoothboot} if paired: # check x2 is not None: #if x2 is None: #raise ValueError('Please specify x2.') x2 = pd.Series(x2).dropna() if len(x1) != len(x2): raise ValueError('x1 and x2 are not the same length.') if (x2 is None) or (paired is True): if x2 is None: tx = x1 paired = False ttest_single = ttest_1samp(x1, 0)[1] ttest_2_ind = 'NIL' ttest_2_paired = 'NIL' wilcoxonresult = 'NIL' elif paired is True: diff = True tx = x2 - x1 ttest_single = 'NIL' ttest_2_ind = 'NIL' ttest_2_paired = ttest_rel(x1, x2)[1] wilcoxonresult = wilcoxon(x1, x2)[1] mannwhitneyresult = 'NIL' # Turns data into array, then tuple. tdata = (tx,) # The value of the statistic function applied # just to the actual data. summ_stat = statfunction(*tdata) statarray = sns.algorithms.bootstrap(tx, **sns_bootstrap_kwargs) statarray.sort() # Get Percentile indices pct_low_high = np.round((reps-1) * alphas) pct_low_high = np.nan_to_num(pct_low_high).astype('int') # Get Bias-Corrected Accelerated indices convenience function invoked. bca_low_high = bca(tdata, alphas, statarray, statfunction, summ_stat, reps) # Warnings for unstable or extreme indices. for ind in [pct_low_high, bca_low_high]: if np.any(ind == 0) or np.any(ind == reps-1): warnings.warn("Some values used extremal samples;" " results are probably unstable.") elif np.any(ind<10) or np.any(ind>=reps-10): warnings.warn("Some values used top 10 low/high samples;" " results may be unstable.") effsize = np.mean(x2-x1) #summary = summ_stat # Calculates more statistics than it returns. # Function can be modified to return necessary statistics. is_paired = paired is_difference = diff statistic = str(statfunction) n_reps = reps ci = (1-alpha_level)*100 stat_array = np.array(statarray) pct_ci_low = statarray[pct_low_high[0]] pct_ci_high = statarray[pct_low_high[1]] pct_low_high_indices = pct_low_high bca_ci_low = statarray[bca_low_high[0]] bca_ci_high = statarray[bca_low_high[1]] bca_low_high_indices = bca_low_high pvalue_1samp_ttest = ttest_single pvalue_2samp_ind_ttest = ttest_2_ind pvalue_2samp_paired_ttest = ttest_2_paired pvalue_wilcoxon = wilcoxonresult pvalue_mann_whitney = mannwhitneyresult effect_size = effsize stat_dict = {'ci' : ci, 'pct_ci_low' : pct_ci_low, 'pct_ci_high' : pct_ci_high, 'pct_low_high_indices' : pct_low_high_indices, 'bca_ci_low' : bca_ci_low, 'bca_ci_high' : bca_ci_high, 'bca_low_high_indices' : bca_low_high, 'pvalue_1samp_ttest' : pvalue_1samp_ttest, 'pvalue_2samp_ind_ttest' : pvalue_2samp_ind_ttest, 'pvalue_2samp_paired_ttest' : pvalue_2samp_paired_ttest, 'pvalue_wilcoxon' : pvalue_wilcoxon, 'pvalue_mann_whitney' : pvalue_mann_whitney, 'effect_size' : effsize} return stat_dict
# Mathieu Blondel, February 2012 # License: BSD 3 clause # Port to Python of examples in chapter 5 of # "Introductory Statistics with R" by Peter Dalgaard import numpy as np from scipy.stats import ttest_1samp, wilcoxon, ttest_ind, mannwhitneyu # daily intake of energy in kJ for 11 women daily_intake = np.array( [5260, 5470, 5640, 6180, 6390, 6515, 6805, 7515, 7515, 8230, 8770]) # one sample t-test # null hypothesis: expected value = 7725 t_statistic, p_value = ttest_1samp(daily_intake, 7725) # p_value < 0.05 => alternative hypothesis: # data deviate significantly from the hypothesis that the mean # is 7725 at the 5% level of significance print "one-sample t-test", p_value # one sample wilcoxon-test z_statistic, p_value = wilcoxon(daily_intake - 7725) print "one-sample wilcoxon-test", p_value energ = np.array([ # energy expenditure in mJ and stature (0=obese, 1=lean) [9.21, 0], [7.53, 1], [7.48, 1],
game.history_quantity from matplotlib import pyplot as plt temp = np.array(game.history_quantity.tolist()) plt.plot(game.history_quantity) plt.title("Quantity") plt.show() plt.plot(game.history_profits) plt.title("profits") plt.show() if len(players) > 1: # total quantity porduced plt.plot(np.sum(game.history_quantity, 1)) plt.title("total quantity produced") plt.show() # total quantity porduced plt.plot(np.sum(game.history_profits, 1)) plt.title("aggregated profits") plt.show() np.mean(np.sum(game.history_quantity, 1)) from scipy.stats import ttest_1samp qty = np.sum(game.history_quantity, 1) qty = qty[-500:] print(np.mean(qty)) print(ttest_1samp(qty, popmean=game.get_competitive_equilibrium_production()))
from scipy import stats one_sample_data = [ 177.3, 182.7, 169.6, 176.3, 180.3, 179.4, 178.5, 177.2, 181.8, 176.5 ] one_sample = stats.ttest_1samp(one_sample_data, 175.3) print "The t-statistic is %.3f and the p-value is %.3f." % one_sample
plt.errorbar( np.arange(3) + 0.05 * (-1 + exp_j), n_pows.mean(1), n_pows.std(1)) plt.xticks([0, 1, 2], ['background', 'random border', 'sin border']) plt.xlim(-0.5, 2.5) plt.ylabel('power / background power') plt.title( 'Relative mean power in {} with rejections: ICA_artifacts and CSP_alpha' .format(channel)) # with rejections: ICA_artifacts and CSP_alpha print(n_pows.mean(1)) print(n_pows.std(1)) all_pows.append(n_pows) all_pows = np.array(all_pows) print(all_pows[:, 0]) print(ttest_1samp(all_pows[:, 0].flatten(), 1)) print(ttest_1samp(all_pows[:, 1].flatten(), 1)) print(ttest_1samp(all_pows[:, 2].flatten(), 1)) plt.legend(experiments) plt.savefig(channel + '.png', dpi=200) plt.show() plt.figure() plt.hist(all_pows[:, 0].flatten(), bins=30) plt.show() plt.figure() plt.hist(all_pows[:, 1].flatten(), bins=30) plt.show()
103, 111, 104, 111, 89, 78, 100, 89, 85, 88 ], [ 137, 105, 133, 108, 115, 170, 103, 145, 78, 107, 84, 148, 147, 87, 166, 146, 123, 135, 112, 93, 76, 116, 78, 101, 123 ]]) dataDiff = data[1, :] - data[0, :] dataDiff.mean(), dataDiff.std() plt.rcParams['figure.figsize'] = (15.0, 5.0) plt.hist(dataDiff) plt.show() t_stat, p_value = ttest_1samp(dataDiff, 0.0) print(p_value / 2.0) mean, std = norm.fit(dataDiff) print(mean, std) print('#', 50 * "-") # ----------------------- from scipy.stats import gaussian_kde plt.hist(dataDiff, density=1) x = numpy.linspace(dataDiff.min(), dataDiff.max(), 1000) pdf = norm.pdf(x, mean, std) plt.plot(x, pdf) pdf = gaussian_kde(dataDiff) pdf = pdf.evaluate(x)
dataset = loadtxt('data.csv', delimiter=',') means = np.mean(dataset, axis=0) stds = np.std(dataset, axis=0) N = dataset.shape[0] # If the hash bits are truly random, we should expect that # 50% of the time each bit is 0, and 50% of the time each bit is 1. # The variance of each bit should be 1/4 (i.e. standard deviation = 1/2). # # We perform a t-test for each bit to determine if the bit's sample # distribution is equal to the expected distribution (mean=0.5)... print('********** t-test **********') num_improbable = 0 for variable in range(dataset.shape[1]): # null hypothesis = "mean is 0.5" # alternative hypothesis = "mean is not 0.5" # if p < 0.05 we reject the null hypothesis --> "mean is not 0.5" stat, p = stats.ttest_1samp(dataset[:, variable], 0.5) if abs(stat) > 2 and p < 0.05: print('Mean of bit %d is probably not 0.5' % variable) num_improbable += 1 print('num_improbable = %d' % num_improbable) assert num_improbable > 0, 'I am crazy, cannot derive a relationship between bits in a sha256 hash' # But if you re-generate the dataset, the bits whose mean is not 0.5 # are not consistent across randomly-generated data sets... :(
print("Wiederholbarkeit ausreichend") else: print("Wiederholbarkeit ist nicht ausreichend") c_gk = (0.1 * Y_TOLERANCE - np.abs(y_deviation)) / 3 / np.std(y_repeat_test, ddof=1) print("") print("C_gk = ", round(c_gk, 3)) if c_gk >= 1.33: print("Wiederholbarkeit und sytematische Abweichung ausreichend") elif c_g >= 1.33: print("Systematische Abweichung zu groß") else: print("Auflösung und systematische Abweichung nicht ausreichend") # Hypothesistest with H0: y_repeat_test = Y_REPEAT_REFERENCE hypo_test = stats.ttest_1samp(y_repeat_test, Y_REPEAT_REFERENCE) print("") print("Hypothesentest auf Abweichung mit p-value = ", round(float(hypo_test[1]), 4)) if hypo_test[1] <= 0.05: print("Abweichung signifikant") else: print("Abweichung nicht signifikant") # Confidence bounds für y_repeat_test GAMMA = 0.95 c1 = stats.t.ppf((1 - GAMMA) / 2, y_repeat_len - 1) c2 = stats.t.ppf((1 + GAMMA) / 2, y_repeat_len - 1) y_repeat_min = np.mean(y_repeat_test) + c1*np.std(y_repeat_test, ddof=1)\ / np.sqrt(y_repeat_len) y_repeat_max = np.mean(y_repeat_test) + c2*np.std(y_repeat_test, ddof=1)\