def test_small(self): x = [1,2,3,3,4] y = [3,2,6,1,6,1,4,1] with warnings.catch_warnings(record=True): # Ties preclude use ... W, pval = stats.ansari(x,y) assert_almost_equal(W,23.5,11) assert_almost_equal(pval,0.13499256881897437,11)
def test_result_attributes(self): x = [1, 2, 3, 3, 4] y = [3, 2, 6, 1, 6, 1, 4, 1] with warnings.catch_warnings(record=True): # Ties preclude use ... res = stats.ansari(x, y) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
def test_small(self): x = [1, 2, 3, 3, 4] y = [3, 2, 6, 1, 6, 1, 4, 1] with warnings.catch_warnings(record=True): # Ties preclude use ... W, pval = stats.ansari(x, y) assert_almost_equal(W, 23.5, 11) assert_almost_equal(pval, 0.13499256881897437, 11)
def nonparametric_check_for_d_similarity(df1, df2, alpha=0.01): common_features = set(df1.columns) & set(df2.columns) features_stats = [] for col in common_features: # H0=same central parameter delta_test, delta_pvalue = stats.mannwhitneyu(df1[col], df2[col]) if delta_pvalue > alpha: delta = 'Same central parameter' else: delta = 'Different central parameter' # H0=equality of the scale parameters scale1_test, scale1_pval = stats.ansari(df1[col], df2[col]) if scale1_pval > alpha: scale1 = 'Same scale AnsariTest' else: scale1 = 'Different scale AnsariTest' # H0=equality of the scale parameters scale2_test, scale2_pval = stats.mood(df1[col], df2[col]) if scale2_pval > alpha: scale2 = 'Same scale MoodTest' else: scale2 = 'Different scale MoodTest' features_stats.append([col, delta_pvalue, delta, scale1_pval, scale1, scale2_pval, scale2]) features_stats = pd.DataFrame(features_stats) features_stats.columns = ['col_name', 'delta_pval', 'delta_status', \ 'scale1_pval', 'scale1_status', 'scale2_pval', 'scale2_status'] return features_stats
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) W, pval = stats.ansari(ramsay, parekh) assert_almost_equal(W, 185.5, 11) assert_almost_equal(pval, 0.18145819972867083, 11)
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) W, pval = stats.ansari(ramsay, parekh) assert_almost_equal(W,185.5,11) assert_almost_equal(pval,0.18145819972867083,11)
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) with warnings.catch_warnings(): warnings.filterwarnings('ignore', message="Ties preclude use of exact statistic.") W, pval = stats.ansari(ramsay, parekh) assert_almost_equal(W,185.5,11) assert_almost_equal(pval,0.18145819972867083,11)
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', message="Ties preclude use of exact statistic.") W, pval = stats.ansari(ramsay, parekh) assert_almost_equal(W, 185.5, 11) assert_almost_equal(pval, 0.18145819972867083, 11)
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.filterwarnings( 'ignore', message="Ties preclude use of exact statistic.") W, pval = stats.ansari(ramsay, parekh) finally: warn_ctx.__exit__() assert_almost_equal(W, 185.5, 11) assert_almost_equal(pval, 0.18145819972867083, 11)
def test_approx(self): ramsay = np.array((111, 107, 100, 99, 102, 106, 109, 108, 104, 99, 101, 96, 97, 102, 107, 113, 116, 113, 110, 98)) parekh = np.array((107, 108, 106, 98, 105, 103, 110, 105, 104, 100, 96, 108, 103, 104, 114, 114, 113, 108, 106, 99)) warn_ctx = WarningManager() warn_ctx.__enter__() try: warnings.filterwarnings('ignore', message="Ties preclude use of exact statistic.") W, pval = stats.ansari(ramsay, parekh) finally: warn_ctx.__exit__() assert_almost_equal(W,185.5,11) assert_almost_equal(pval,0.18145819972867083,11)
def vector_hypotheses(a, b): dict_stat = {} dict_pval = {} pea = pearsonr(a, b) dict_stat["pearsonr"], dict_pval["pearsonr"] = pea[0], pea[1] ran = ranksums(a, b) dict_stat["ranksums"], dict_pval["ranksums"] = ran[0], ran[1] moo = mood(a, b) dict_stat["mood"], dict_pval["mood"] = moo[0], moo[1] fli = fligner(a, b) dict_stat["fligner"], dict_pval["fligner"] = fli[0], fli[1] ans = ansari(a, b) dict_stat["ansari"], dict_pval["ansari"] = ans[0], ans[1] bar = bartlett(a, b) dict_stat["bartlett"], dict_pval["bartlett"] = bar[0], bar[1] lev = levene(a, b) dict_stat["levene"], dict_pval["levene"] = lev[0], lev[1] man = mannwhitneyu(a, b) dict_stat["mannwhitneyu"], dict_pval["mannwhitneyu"] = man[0], man[1] return dict_stat, dict_pval
def plotExpBox_Main(inputFiles,headers,valcols,outputFile,sep,startRow,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,plotPvalueCluster,outputClusterPrefix,methodCluster,xlegendrotation,xlabe,ylabe,figsz,titl,showSampleSizes,trimToMinSize,relabels,logb,plotHistogramToFile,plotMedianForGroups,botta,showViolin,showBox,firstColAnnot,plotTrend,showLegend,makePzfxFile,makeBinMatrix,writeDataSummaryStat,summaryStatRange,minuslog10pvalue,minNDataToKeep,vfacecolor,valpha,outXYZPvalues,dividePlots): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData=[] xtickLabels=[] trendData={} annot={} minSize=-1 for inputFile,header,cols in zip(inputFiles,headers,valcols): fin=generic_istream(inputFile) startIdx=len(plotData) if firstColAnnot: colAnnot=cols[0] cols=cols[1:] annotThisFile=[] annot[startIdx]=annotThisFile else: colAnnot=-1 annotThisFile=None for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices=range(startIdx,startIdx+len(cols)) if plotTrend: #print >> stderr,"plotTrend" trendDataThisFile=[] trendData[startIdx]=trendDataThisFile else: trendDataThisFile=None lino=0 for lin in fin: lino+=1 if lino<startRow: continue fields=lin.rstrip("\r\n").split(sep) if plotTrend: #print >> stderr,"a" trendDataThisLine=[] else: trendDataThisLine=None allDataOKThisLine=True if colAnnot>=0: annotThisFile.append(fields[colAnnot]) for idx,col in zip(colIndices,cols): try: value=float(fields[col]) if logb!=0: if value==0.0: raise ValueError value=log(value)/logb plotData[idx].append(value) if plotTrend: trendDataThisLine.append(value) #print >> stderr,"value:",value except: allDataOKThisLine=False if plotTrend: if allDataOKThisLine: trendDataThisFile.append(trendDataThisLine) else: trendDataThisFile.append(None) fin.close() if minSize==-1: minSize=len(plotData[idx]) #or startIDX? else: minSize=min([minSize,len(plotData[idx])]) if trimToMinSize: print >> stderr,"trimming to min size =",minSize trimData(plotData,minSize) if len(relabels)>0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() print >> stderr,xtickLabels print >> stderr,relabels for i,relabel in zip(range(0,len(relabels)),relabels): xtickLabels[i]=relabel for i in range(0,len(plotMedianForGroups)): plotMedianForGroups[i]=getCol0ListFromCol1ListStringAdv(xtickLabels,plotMedianForGroups[i]) #drawing medians: medianToDraw=[] for mediangrouper in plotMedianForGroups: curD=[] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData)-1,-1,-1): if len(plotData[c])<minNDataToKeep: print >> stderr,xtickLabels[c],"discarded because has only",len(plotData[c]),"data points <",minNDataToKeep del plotData[c] del xtickLabels[c] if not skipStat: print >> stdout,"student t-test (1 sample; mean=0)" print >> stdout,"sample","mean","p-val","median" if writeDataSummaryStat: fDSS=open(writeDataSummaryStat,"w") print >> fDSS,"sample\tmean\tvar\tsd\tmin\tmax\tN\tNInRange["+str(summaryStatRange[0])+","+str(summaryStatRange[1])+"]\t%NInRange\tNbelowRange\t%Nbelow\tNAboveRange\t%NAbove" for x in range(0,len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x],mean(plotData[x]),ttest_1samp(plotData[x],0)[1],median(plotData[x]) except: print >> stdout, xtickLabels[x],mean(plotData[x]),"NA",median(plotData[x]) if writeDataSummaryStat: sumData,N,NIN,NBelow,NAbove=filterDataInRangeInclusive(plotData[x],summaryStatRange[0],summaryStatRange[1]) if NIN>1: #print >> stderr,"sumData=",sumData #print >> stderr,mean mea=mean2(sumData) DDOF=1 sd=std(sumData,ddof=DDOF) var=sd*sd mi=min(sumData) ma=max(sumData) else: mea="NA" sd="NA" var="NA" mi="NA" ma="NA" print >> fDSS,xtickLabels[x]+"\t"+str(mea)+"\t"+str(var)+"\t"+str(sd)+"\t"+str(mi)+"\t"+str(ma)+"\t"+str(N)+"\t"+str(NIN)+"\t"+str(float(NIN)*100/N)+"\t"+str(NBelow)+"\t"+str(float(NBelow)*100/N)+"\t"+str(NAbove)+"\t"+str(float(NAbove)*100/N) pvalueM=[] if writeDataSummaryStat: fDSS.close() print >> stdout,"" print >> stdout,"student t-test (2 samples)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=ttest_ind(plotData[x],plotData[y])[1] except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; print >> stdout,"" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_t_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_t",xtickLabels,pvalueM,methodCluster) pvalueM=[] print >> stdout,"welch t-test" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue=welchs_approximate_ttest_arr(plotData[x],plotData[y])[3] except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout,""; if outXYZPvalues: writeXYZPvalues(outXYZPvalues+"_Welch.xyz",xtickLabels,pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Welch_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Welch",xtickLabels,pvalueM,methodCluster) print >> stdout,"" print >> stdout,"non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=mannwhitneyu(plotData[x],plotData[y])[1]*2 except: pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if outXYZPvalues: writeXYZPvalues(outXYZPvalues+"_U.xyz",xtickLabels,pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_U_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_U",xtickLabels,pvalueM,methodCluster) #####now the variance tests print >> stdout,"" print >> stdout,"Ansari-Bradley Two-sample Test for difference in scale parameters " print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=ansari(plotData[x],plotData[y])[1] except: pvalue="NA" if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 #pvalue=1.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_Ansari_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_Ansari",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Fligner's Two-sample Test for equal variance (non-parametrics)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=fligner(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_fligner_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_fligner",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Levene's Two-sample Test for equal variance" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=levene(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_levene_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_levene",xtickLabels,pvalueM,methodCluster) ##### #####now the variance tests print >> stdout,"" print >> stdout,"Bartlett's Two-sample Test for equal variance (for normal distributions)" print >> stdout,"p-val", for x in range(0,len(plotData)): print >> stdout,xtickLabels[x], pvalueM=[] print >> stdout,"" for x in range(0,len(plotData)): pvalueRow=[] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0,len(plotData)): if y<=x: print >> stdout, "", if x==y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue=bartlett(plotData[x],plotData[y])[1] except: pvalue="NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue)!="NA": try: pvalue=-1*log(pvalue,10) except: pvalue=-1000.0 print >> stdout,pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout,""; if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix+"_bartlett_raw",xtickLabels,pvalueM) makePValueClusterPlot(outputClusterPrefix+"_bartlett",xtickLabels,pvalueM,methodCluster) ##### figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl)==0: titl=outputFile plotExpBox(plotData,xtickLabels,showIndPoints,mark,markMean,showMean,notch,whisker,outliers,xlegendrotation,xlabe,ylabe,titl,showSampleSizes,showViolin,showBox,annot,trendData,showLegend,makePzfxFile,makeBinMatrix,dividePlots) #ylim([0,200]) for m in medianToDraw: axhline(y=m,linestyle=':',color='gray') savefig(outputFile,bbox_inches="tight") if len(plotHistogramToFile)>0: drawHistogram(plotHistogramToFile,plotData,xtickLabels) drawDensigram(plotHistogramToFile+".density.png",plotData,xtickLabels)
def custom(a, b): v, p = stats.ansari(a, b) return p
def test_exact(self): W,pval = stats.ansari([1,2,3,4],[15,5,20,8,10,12]) assert_almost_equal(W,10.0,11) assert_almost_equal(pval,0.533333333333333333,7)
def test_small(self): x = [1,2,3,3,4] y = [3,2,6,1,6,1,4,1] W, pval = stats.ansari(x,y) assert_almost_equal(W,23.5,11) assert_almost_equal(pval,0.13499256881897437,11)
def PH_Walker(y, walkerRule='prop', walkerParams=np.array([])): """ PH_Walker simulates a hypothetical walker moving through the time domain the hypothetical particle (or 'walker') moves in response to values of the time series at each point Outputs from this operation are summaries of the walkers motion, and comparisons of it to the original time series :param y: the input time series :param walkerRule: the kinematic rule by which the walker moves in response to the time series over time (i) 'prop': the walker narrows the gap between its value and that of the time series by a given proportion p (ii) 'biasprop': the walker is biased to move more in one direction; when it is being pushed up by the time series, it narrows the gap by a proportion p_{up}, and when it is being pushed down by the time series it narrows the gap by a (potentially different) proportion p_{down}. walkerParams = [pup,pdown] (iii) 'momentum': the walker moves as if it has mass m and inertia from the previous time step and the time series acts as a force altering its motion in a classical Newtonian dynamics framework. [walkerParams = m], the mass. (iv) 'runningvar': the walker moves with inertia as above, but its values are also adjusted so as to match the local variance of time series by a multiplicative factor. walkerParams = [m,wl], where m is the inertial mass and wl is the window length. :param walkerParams: the parameters for the specified walker, explained above :return: include the mean, spread, maximum, minimum, and autocorrelation of the walker's trajectory, the number of crossings between the walker and the original time series, the ratio or difference of some basic summary statistics between the original time series and the walker, an Ansari-Bradley test comparing the distributions of the walker and original time series, and various statistics summarizing properties of the residuals between the walker's trajectory and the original time series. """ # ---------------------------------------------------------------------------------------------------------------------------------- # PRELIMINARIES #---------------------------------------------------------------------------------------------------------------------------------- N = len(y) #---------------------------------------------------------------------------------------------------------------------------------- # CHECK INPUTS #---------------------------------------------------------------------------------------------------------------------------------- if (len(walkerParams) == 0): if walkerRule == 'prop': walkerParams = np.array([0.5]) if walkerRule == 'biasprop': walkerParams = np.array([0.1, 0.2]) if walkerRule == 'momentum': walkerParams = np.array([2]) if walkerRule == 'runningvar': walkerParams = [1.5, 50] #---------------------------------------------------------------------------------------------------------------------------------- # (1) WALK #---------------------------------------------------------------------------------------------------------------------------------- w = np.zeros(N) if walkerRule == 'prop': # walker starts at zero and narrows the gap between its position # and the time series value at that point by the proportion given # in walkerParams, to give the value at the subsequent time step p = walkerParams w[0] = 0 for i in range(1, N): w[i] = w[i - 1] + p * (y[i - 1] - w[i - 1]) elif walkerRule == 'biasprop': # walker is biased in one or the other direction (i.e., prefers to # go up, or down). Requires a vector of inputs: [p_up, p_down] pup = walkerParams[0] pdown = walkerParams[1] w[0] = 0 for i in range(1, N): if y[i] > y[i - 1]: w[i] = w[i - 1] + pup * (y[i - 1] - w[i - 1]) else: w[i] = w[i - 1] + pdown * (y[i - 1] - w[i - 1]) elif walkerRule == 'momentum': # walker moves as if it had inertia from the previous time step, # i.e., it 'wants' to move the same amount; the time series acts as # a force changing its motion m = walkerParams[0] # inertial mass w[0] = y[0] w[1] = y[1] for i in range(2, N): w_inert = w[i - 1] + (w[i - 1] - w[i - 2]) w[i] = w_inert + (y[i] - w_inert) / m # dissipative term #equation of motion (s-s_0 = ut + F/m*t^2) #where the 'force' is F is the change in the original time series at the point elif walkerRule == 'runningvar': m = walkerParams[0] wl = walkerParams[1] w[0] = y[0] w[1] = y[1] for i in range(2, N): w_inert = w[i - 1] + (w[i - 1] - w[i - 2]) w_mom = w_inert + (y[i] - w_inert) / m #dissipative term from time series if i > wl: w[i] = w_mom * (np.std(y[(i - wl):i])) / np.std(w[(i - wl):i]) else: w[i] = w_mom else: print("Error: Unknown method: " + walkerRule + " for simulating walker on the time series") #---------------------------------------------------------------------------------------------------------------------------------- # (2) STATISITICS ON THE WALK #---------------------------------------------------------------------------------------------------------------------------------- out = {} # dictionary for storing variables # (i) The walk itself ------------------------------------------------------------------------------------------- out['w_mean'] = np.mean(w) out['w_median'] = np.median(w) out['w_std'] = np.std(w) out['w_ac1'] = ac.CO_AutoCorr( w, 1, method='timedomainstat' ) # this function call in MATLAB uses method='Fourier', but we don't have that case implemented yet in autoCorr, however this seems to output the same thing out['w_ac2'] = ac.CO_AutoCorr(w, 2, method='timedomainstat') out['w_tau'] = fz.CO_FirstZero(w, 'ac') out['w_min'] = np.min(w) out['w_max'] = np.max(w) out['propzcross'] = sum( np.multiply(w[0:(len(w) - 2)], w[1:(len(w) - 1)]) < 0) / ( N - 1 ) # np.multiply performs elementwise multiplication like matlab .* # differences between the walk at signal # (ii) Differences between the walk at signal ------------------------------------------------------------------- out['sw_meanabsdiff'] = np.mean(np.abs(y - w)) out['sw_taudiff'] = fz.CO_FirstZero(y, 'ac') - fz.CO_FirstZero(w, 'ac') out['sw_stdrat'] = np.std(w) / np.std( y) # will be thse same as w_std for z-scored signal out['sw_ac1rat'] = out['w_ac1'] / ac.CO_AutoCorr(y, 1) out['sw_minrat'] = min(w) / min(y) out['sw_maxrat'] = max(w) / max(y) out['sw_propcross'] = sum( np.multiply(w[0:(len(w) - 1)] - y[0:(len(y) - 1)], w[1:( len(w))] - y[1:(len(y))]) < 0) / ( N - 1 ) #np.multiply performs elementwise multiplication like matlab .* ansari = stats.ansari(w, y) out['sw_ansarib_pval'] = ansari[1] # r = np.linspace( np.min(np.min(y), np.min(w)), np.max(np.max(y), np.max(w)), 200 ) # dy = stats.gaussian_kde(y, r) # (iii) looking at residuals between time series and walker res = w - y # CLOSEST FUNCTION TO MATLAB RUNSTEST, found in statsmodels.sandbox.stats.runs # runstest = runs.runstest_2samp(res, groups=2) # out['res_runstest'] = runstest out['out.res_acl'] = ac.CO_AutoCorr(res, lag=1) return out
def test_ansariBradleyTest_approxOdd_xResult(self): data_1 = np.arange(1, 101) data_2 = np.arange(50, 151) x1, p1 = ansari_bradley_test(data_1, data_2, alternative="two-sided") x2, p2 = ansari(data_1, data_2) assert pytest.approx(x2) == x1
def test_exact(self): W, pval = stats.ansari([1, 2, 3, 4], [15, 5, 20, 8, 10, 12]) assert_almost_equal(W, 10.0, 11) assert_almost_equal(pval, 0.533333333333333333, 7)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument( "--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;", ) parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help= "Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help= "If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values.", ) parser.add_argument( "--fisher", action="store_true", default=False, help="if true then Fisher definition is used", ) parser.add_argument( "--bias", action="store_true", default=False, help= "if false,then the calculations are corrected for statistical bias", ) parser.add_argument( "--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored", ) parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored", ) parser.add_argument( "--inclusive", action="store_true", default=False, help="if false,limit will be ignored", ) parser.add_argument( "--printextras", action="store_true", default=False, help= "If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help= "Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument( "--correction", action="store_true", default=False, help="continuity correction ", ) parser.add_argument( "--axis", type=int, default=0, help= "Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help= "the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument( "--score", type=int, default=0, help="Score that is compared to the elements in a.", ) parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help= "The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument( "--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds", ) parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help= "lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help= "If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument( "--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e", ) parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols is not None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols is not None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols is not None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe( map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis( map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias, ) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode, ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf == 0 and mf == 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf == 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf == 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf == 0 and mf == 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf == 0 and mf == 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf == 0 and mf == 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf == 0 and mf == 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation, ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation, ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf == 0 and mf == 0: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq( map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf == 0 and mf == 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf == 0 and mf == 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1( map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail, ) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf == 0 and mf == 0: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram( map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf == 0 and mf == 0: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq( map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf == 0 and mf == 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda == 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two)) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity, ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind(map(float, sample_one), map(float, sample_two), equal_var=args.equal_var) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort, ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction, ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_, ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two), ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
print("bartlett") data['bartlett'] = [ bartlett(x, y)[0] for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors)) ] print("ranksums") data['ranksums'] = [ ranksums(x, y)[0] for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors)) ] print("ansari") data['ansari'] = [ ansari(x, y)[0] for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors)) ] #============================================================================== # print("mannwhitneyu") # data['mannwhitneyu'] = [mannwhitneyu(x, y)[0] for (x, y) in zip(np.nan_to_num(question1_vectors), # np.nan_to_num(question2_vectors))] #============================================================================== print("fligner") data['fligner'] = [ fligner(x, y)[0] for (x, y) in zip(np.nan_to_num(question1_vectors), np.nan_to_num(question2_vectors)) ]
def test_result_attributes(self): x = [1, 2, 3, 3, 4] y = [3, 2, 6, 1, 6, 1, 4, 1] res = stats.ansari(x, y) attributes = ('statistic', 'pvalue') check_named_results(res, attributes)
def main(): parser = argparse.ArgumentParser() parser.add_argument("-i", "--infile", required=True, help="Tabular file.") parser.add_argument("-o", "--outfile", required=True, help="Path to the output file.") parser.add_argument("--sample_one_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_two_cols", help="Input format, like smi, sdf, inchi") parser.add_argument("--sample_cols", help="Input format, like smi, sdf, inchi,separate arrays using ;") parser.add_argument("--test_id", help="statistical test method") parser.add_argument( "--mwu_use_continuity", action="store_true", default=False, help="Whether a continuity correction (1/2.) should be taken into account.", ) parser.add_argument( "--equal_var", action="store_true", default=False, help="If set perform a standard independent 2 sample test that assumes equal population variances. If not set, perform Welch's t-test, which does not assume equal population variance.", ) parser.add_argument( "--reta", action="store_true", default=False, help="Whether or not to return the internally computed a values." ) parser.add_argument("--fisher", action="store_true", default=False, help="if true then Fisher definition is used") parser.add_argument( "--bias", action="store_true", default=False, help="if false,then the calculations are corrected for statistical bias", ) parser.add_argument("--inclusive1", action="store_true", default=False, help="if false,lower_limit will be ignored") parser.add_argument( "--inclusive2", action="store_true", default=False, help="if false,higher_limit will be ignored" ) parser.add_argument("--inclusive", action="store_true", default=False, help="if false,limit will be ignored") parser.add_argument( "--printextras", action="store_true", default=False, help="If True, if there are extra points a warning is raised saying how many of those points there are", ) parser.add_argument( "--initial_lexsort", action="store_true", default="False", help="Whether to use lexsort or quicksort as the sorting method for the initial sort of the inputs.", ) parser.add_argument("--correction", action="store_true", default=False, help="continuity correction ") parser.add_argument( "--axis", type=int, default=0, help="Axis can equal None (ravel array first), or an integer (the axis over which to operate on a and b)", ) parser.add_argument( "--n", type=int, default=0, help="the number of trials. This is ignored if x gives both the number of successes and failures", ) parser.add_argument("--b", type=int, default=0, help="The number of bins to use for the histogram") parser.add_argument("--N", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--ddof", type=int, default=0, help="Degrees of freedom correction") parser.add_argument("--score", type=int, default=0, help="Score that is compared to the elements in a.") parser.add_argument("--m", type=float, default=0.0, help="limits") parser.add_argument("--mf", type=float, default=2.0, help="lower limit") parser.add_argument("--nf", type=float, default=99.9, help="higher_limit") parser.add_argument( "--p", type=float, default=0.5, help="The hypothesized probability of success. 0 <= p <= 1. The default value is p = 0.5", ) parser.add_argument("--alpha", type=float, default=0.9, help="probability") parser.add_argument("--new", type=float, default=0.0, help="Value to put in place of values in a outside of bounds") parser.add_argument( "--proportiontocut", type=float, default=0.0, help="Proportion (in range 0-1) of total data set to trim of each end.", ) parser.add_argument( "--lambda_", type=float, default=1.0, help="lambda_ gives the power in the Cressie-Read power divergence statistic", ) parser.add_argument( "--imbda", type=float, default=0, help="If lmbda is not None, do the transformation for that value.If lmbda is None, find the lambda that maximizes the log-likelihood function and return it as the second output argument.", ) parser.add_argument("--base", type=float, default=1.6, help="The logarithmic base to use, defaults to e") parser.add_argument("--dtype", help="dtype") parser.add_argument("--med", help="med") parser.add_argument("--cdf", help="cdf") parser.add_argument("--zero_method", help="zero_method options") parser.add_argument("--dist", help="dist options") parser.add_argument("--ties", help="ties options") parser.add_argument("--alternative", help="alternative options") parser.add_argument("--mode", help="mode options") parser.add_argument("--method", help="method options") parser.add_argument("--md", help="md options") parser.add_argument("--center", help="center options") parser.add_argument("--kind", help="kind options") parser.add_argument("--tail", help="tail options") parser.add_argument("--interpolation", help="interpolation options") parser.add_argument("--statistic", help="statistic options") args = parser.parse_args() infile = args.infile outfile = open(args.outfile, "w+") test_id = args.test_id nf = args.nf mf = args.mf imbda = args.imbda inclusive1 = args.inclusive1 inclusive2 = args.inclusive2 sample0 = 0 sample1 = 0 sample2 = 0 if args.sample_cols != None: sample0 = 1 barlett_samples = [] for sample in args.sample_cols.split(";"): barlett_samples.append(map(int, sample.split(","))) if args.sample_one_cols != None: sample1 = 1 sample_one_cols = args.sample_one_cols.split(",") if args.sample_two_cols != None: sample_two_cols = args.sample_two_cols.split(",") sample2 = 1 for line in open(infile): sample_one = [] sample_two = [] cols = line.strip().split("\t") if sample0 == 1: b_samples = columns_to_values(barlett_samples, line) if sample1 == 1: for index in sample_one_cols: sample_one.append(cols[int(index) - 1]) if sample2 == 1: for index in sample_two_cols: sample_two.append(cols[int(index) - 1]) if test_id.strip() == "describe": size, min_max, mean, uv, bs, bk = stats.describe(map(float, sample_one)) cols.append(size) cols.append(min_max) cols.append(mean) cols.append(uv) cols.append(bs) cols.append(bk) elif test_id.strip() == "mode": vals, counts = stats.mode(map(float, sample_one)) cols.append(vals) cols.append(counts) elif test_id.strip() == "nanmean": m = stats.nanmean(map(float, sample_one)) cols.append(m) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "kurtosistest": z_value, p_value = stats.kurtosistest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "itemfreq": freq = stats.itemfreq(map(float, sample_one)) for list in freq: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "nanmedian": m = stats.nanmedian(map(float, sample_one)) cols.append(m) elif test_id.strip() == "variation": ra = stats.variation(map(float, sample_one)) cols.append(ra) elif test_id.strip() == "boxcox_llf": IIf = stats.boxcox_llf(imbda, map(float, sample_one)) cols.append(IIf) elif test_id.strip() == "tiecorrect": fa = stats.tiecorrect(map(float, sample_one)) cols.append(fa) elif test_id.strip() == "rankdata": r = stats.rankdata(map(float, sample_one), method=args.md) cols.append(r) elif test_id.strip() == "nanstd": s = stats.nanstd(map(float, sample_one), bias=args.bias) cols.append(s) elif test_id.strip() == "anderson": A2, critical, sig = stats.anderson(map(float, sample_one), dist=args.dist) cols.append(A2) for list in critical: cols.append(list) cols.append(",") for list in sig: cols.append(list) elif test_id.strip() == "binom_test": p_value = stats.binom_test(map(float, sample_one), n=args.n, p=args.p) cols.append(p_value) elif test_id.strip() == "gmean": gm = stats.gmean(map(float, sample_one), dtype=args.dtype) cols.append(gm) elif test_id.strip() == "hmean": hm = stats.hmean(map(float, sample_one), dtype=args.dtype) cols.append(hm) elif test_id.strip() == "kurtosis": k = stats.kurtosis(map(float, sample_one), axis=args.axis, fisher=args.fisher, bias=args.bias) cols.append(k) elif test_id.strip() == "moment": n_moment = stats.moment(map(float, sample_one), n=args.n) cols.append(n_moment) elif test_id.strip() == "normaltest": k2, p_value = stats.normaltest(map(float, sample_one)) cols.append(k2) cols.append(p_value) elif test_id.strip() == "skew": skewness = stats.skew(map(float, sample_one), bias=args.bias) cols.append(skewness) elif test_id.strip() == "skewtest": z_value, p_value = stats.skewtest(map(float, sample_one)) cols.append(z_value) cols.append(p_value) elif test_id.strip() == "sem": s = stats.sem(map(float, sample_one), ddof=args.ddof) cols.append(s) elif test_id.strip() == "zscore": z = stats.zscore(map(float, sample_one), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "signaltonoise": s2n = stats.signaltonoise(map(float, sample_one), ddof=args.ddof) cols.append(s2n) elif test_id.strip() == "percentileofscore": p = stats.percentileofscore(map(float, sample_one), score=args.score, kind=args.kind) cols.append(p) elif test_id.strip() == "bayes_mvs": c_mean, c_var, c_std = stats.bayes_mvs(map(float, sample_one), alpha=args.alpha) cols.append(c_mean) cols.append(c_var) cols.append(c_std) elif test_id.strip() == "sigmaclip": c, c_low, c_up = stats.sigmaclip(map(float, sample_one), low=args.m, high=args.n) cols.append(c) cols.append(c_low) cols.append(c_up) elif test_id.strip() == "kstest": d, p_value = stats.kstest( map(float, sample_one), cdf=args.cdf, N=args.N, alternative=args.alternative, mode=args.mode ) cols.append(d) cols.append(p_value) elif test_id.strip() == "chi2_contingency": chi2, p, dof, ex = stats.chi2_contingency( map(float, sample_one), correction=args.correction, lambda_=args.lambda_ ) cols.append(chi2) cols.append(p) cols.append(dof) cols.append(ex) elif test_id.strip() == "tmean": if nf is 0 and mf is 0: mean = stats.tmean(map(float, sample_one)) else: mean = stats.tmean(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(mean) elif test_id.strip() == "tmin": if mf is 0: min = stats.tmin(map(float, sample_one)) else: min = stats.tmin(map(float, sample_one), lowerlimit=mf, inclusive=args.inclusive) cols.append(min) elif test_id.strip() == "tmax": if nf is 0: max = stats.tmax(map(float, sample_one)) else: max = stats.tmax(map(float, sample_one), upperlimit=nf, inclusive=args.inclusive) cols.append(max) elif test_id.strip() == "tvar": if nf is 0 and mf is 0: var = stats.tvar(map(float, sample_one)) else: var = stats.tvar(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(var) elif test_id.strip() == "tstd": if nf is 0 and mf is 0: std = stats.tstd(map(float, sample_one)) else: std = stats.tstd(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(std) elif test_id.strip() == "tsem": if nf is 0 and mf is 0: s = stats.tsem(map(float, sample_one)) else: s = stats.tsem(map(float, sample_one), (mf, nf), (inclusive1, inclusive2)) cols.append(s) elif test_id.strip() == "scoreatpercentile": if nf is 0 and mf is 0: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), interpolation_method=args.interpolation ) else: s = stats.scoreatpercentile( map(float, sample_one), map(float, sample_two), (mf, nf), interpolation_method=args.interpolation ) for list in s: cols.append(list) elif test_id.strip() == "relfreq": if nf is 0 and mf is 0: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b) else: rel, low_range, binsize, ex = stats.relfreq(map(float, sample_one), args.b, (mf, nf)) for list in rel: cols.append(list) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "binned_statistic": if nf is 0 and mf is 0: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b ) else: st, b_edge, b_n = stats.binned_statistic( map(float, sample_one), map(float, sample_two), statistic=args.statistic, bins=args.b, range=(mf, nf), ) cols.append(st) cols.append(b_edge) cols.append(b_n) elif test_id.strip() == "threshold": if nf is 0 and mf is 0: o = stats.threshold(map(float, sample_one), newval=args.new) else: o = stats.threshold(map(float, sample_one), mf, nf, newval=args.new) for list in o: cols.append(list) elif test_id.strip() == "trimboth": o = stats.trimboth(map(float, sample_one), proportiontocut=args.proportiontocut) for list in o: cols.append(list) elif test_id.strip() == "trim1": t1 = stats.trim1(map(float, sample_one), proportiontocut=args.proportiontocut, tail=args.tail) for list in t1: cols.append(list) elif test_id.strip() == "histogram": if nf is 0 and mf is 0: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b) else: hi, low_range, binsize, ex = stats.histogram(map(float, sample_one), args.b, (mf, nf)) cols.append(hi) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "cumfreq": if nf is 0 and mf is 0: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b) else: cum, low_range, binsize, ex = stats.cumfreq(map(float, sample_one), args.b, (mf, nf)) cols.append(cum) cols.append(low_range) cols.append(binsize) cols.append(ex) elif test_id.strip() == "boxcox_normmax": if nf is 0 and mf is 0: ma = stats.boxcox_normmax(map(float, sample_one)) else: ma = stats.boxcox_normmax(map(float, sample_one), (mf, nf), method=args.method) cols.append(ma) elif test_id.strip() == "boxcox": if imbda is 0: box, ma, ci = stats.boxcox(map(float, sample_one), alpha=args.alpha) cols.append(box) cols.append(ma) cols.append(ci) else: box = stats.boxcox(map(float, sample_one), imbda, alpha=args.alpha) cols.append(box) elif test_id.strip() == "histogram2": h2 = stats.histogram2(map(float, sample_one), map(float, sample_two)) for list in h2: cols.append(list) elif test_id.strip() == "ranksums": z_statistic, p_value = stats.ranksums(map(float, sample_one), map(float, sample_two)) cols.append(z_statistic) cols.append(p_value) elif test_id.strip() == "ttest_1samp": t, prob = stats.ttest_1samp(map(float, sample_one), map(float, sample_two)) for list in t: cols.append(list) for list in prob: cols.append(list) elif test_id.strip() == "ansari": AB, p_value = stats.ansari(map(float, sample_one), map(float, sample_two)) cols.append(AB) cols.append(p_value) elif test_id.strip() == "linregress": slope, intercept, r_value, p_value, stderr = stats.linregress( map(float, sample_one), map(float, sample_two) ) cols.append(slope) cols.append(intercept) cols.append(r_value) cols.append(p_value) cols.append(stderr) elif test_id.strip() == "pearsonr": cor, p_value = stats.pearsonr(map(float, sample_one), map(float, sample_two)) cols.append(cor) cols.append(p_value) elif test_id.strip() == "pointbiserialr": r, p_value = stats.pointbiserialr(map(float, sample_one), map(float, sample_two)) cols.append(r) cols.append(p_value) elif test_id.strip() == "ks_2samp": d, p_value = stats.ks_2samp(map(float, sample_one), map(float, sample_two)) cols.append(d) cols.append(p_value) elif test_id.strip() == "mannwhitneyu": mw_stats_u, p_value = stats.mannwhitneyu( map(float, sample_one), map(float, sample_two), use_continuity=args.mwu_use_continuity ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "zmap": z = stats.zmap(map(float, sample_one), map(float, sample_two), ddof=args.ddof) for list in z: cols.append(list) elif test_id.strip() == "ttest_ind": mw_stats_u, p_value = stats.ttest_ind( map(float, sample_one), map(float, sample_two), equal_var=args.equal_var ) cols.append(mw_stats_u) cols.append(p_value) elif test_id.strip() == "ttest_rel": t, prob = stats.ttest_rel(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(t) cols.append(prob) elif test_id.strip() == "mood": z, p_value = stats.mood(map(float, sample_one), map(float, sample_two), axis=args.axis) cols.append(z) cols.append(p_value) elif test_id.strip() == "shapiro": W, p_value, a = stats.shapiro(map(float, sample_one), map(float, sample_two), args.reta) cols.append(W) cols.append(p_value) for list in a: cols.append(list) elif test_id.strip() == "kendalltau": k, p_value = stats.kendalltau( map(float, sample_one), map(float, sample_two), initial_lexsort=args.initial_lexsort ) cols.append(k) cols.append(p_value) elif test_id.strip() == "entropy": s = stats.entropy(map(float, sample_one), map(float, sample_two), base=args.base) cols.append(s) elif test_id.strip() == "spearmanr": if sample2 == 1: rho, p_value = stats.spearmanr(map(float, sample_one), map(float, sample_two)) else: rho, p_value = stats.spearmanr(map(float, sample_one)) cols.append(rho) cols.append(p_value) elif test_id.strip() == "wilcoxon": if sample2 == 1: T, p_value = stats.wilcoxon( map(float, sample_one), map(float, sample_two), zero_method=args.zero_method, correction=args.correction, ) else: T, p_value = stats.wilcoxon( map(float, sample_one), zero_method=args.zero_method, correction=args.correction ) cols.append(T) cols.append(p_value) elif test_id.strip() == "chisquare": if sample2 == 1: rho, p_value = stats.chisquare(map(float, sample_one), map(float, sample_two), ddof=args.ddof) else: rho, p_value = stats.chisquare(map(float, sample_one), ddof=args.ddof) cols.append(rho) cols.append(p_value) elif test_id.strip() == "power_divergence": if sample2 == 1: stat, p_value = stats.power_divergence( map(float, sample_one), map(float, sample_two), ddof=args.ddof, lambda_=args.lambda_ ) else: stat, p_value = stats.power_divergence(map(float, sample_one), ddof=args.ddof, lambda_=args.lambda_) cols.append(stat) cols.append(p_value) elif test_id.strip() == "theilslopes": if sample2 == 1: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), map(float, sample_two), alpha=args.alpha) else: mpe, met, lo, up = stats.theilslopes(map(float, sample_one), alpha=args.alpha) cols.append(mpe) cols.append(met) cols.append(lo) cols.append(up) elif test_id.strip() == "combine_pvalues": if sample2 == 1: stat, p_value = stats.combine_pvalues( map(float, sample_one), method=args.med, weights=map(float, sample_two) ) else: stat, p_value = stats.combine_pvalues(map(float, sample_one), method=args.med) cols.append(stat) cols.append(p_value) elif test_id.strip() == "obrientransform": ob = stats.obrientransform(*b_samples) for list in ob: elements = ",".join(map(str, list)) cols.append(elements) elif test_id.strip() == "f_oneway": f_value, p_value = stats.f_oneway(*b_samples) cols.append(f_value) cols.append(p_value) elif test_id.strip() == "kruskal": h, p_value = stats.kruskal(*b_samples) cols.append(h) cols.append(p_value) elif test_id.strip() == "friedmanchisquare": fr, p_value = stats.friedmanchisquare(*b_samples) cols.append(fr) cols.append(p_value) elif test_id.strip() == "fligner": xsq, p_value = stats.fligner(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(xsq) cols.append(p_value) elif test_id.strip() == "bartlett": T, p_value = stats.bartlett(*b_samples) cols.append(T) cols.append(p_value) elif test_id.strip() == "levene": w, p_value = stats.levene(center=args.center, proportiontocut=args.proportiontocut, *b_samples) cols.append(w) cols.append(p_value) elif test_id.strip() == "median_test": stat, p_value, m, table = stats.median_test( ties=args.ties, correction=args.correction, lambda_=args.lambda_, *b_samples ) cols.append(stat) cols.append(p_value) cols.append(m) cols.append(table) for list in table: elements = ",".join(map(str, list)) cols.append(elements) outfile.write("%s\n" % "\t".join(map(str, cols))) outfile.close()
def plotExpBox_Main(inputFiles, headers, valcols, outputFile, sep, startRow, showIndPoints, mark, markMean, showMean, notch, whisker, outliers, plotPvalueCluster, outputClusterPrefix, methodCluster, xlegendrotation, xlabe, ylabe, figsz, titl, showSampleSizes, trimToMinSize, relabels, logb, plotHistogramToFile, plotMedianForGroups, botta, showViolin, showBox, firstColAnnot, plotTrend, showLegend, makePzfxFile, makeBinMatrix, writeDataSummaryStat, summaryStatRange, minuslog10pvalue, minNDataToKeep, vfacecolor, valpha, outXYZPvalues, dividePlots): #if plotPvalueCluster: #if pvalue cluster is needed: # from Bio.Cluster.cluster import * # from Bio.Cluster import * #endif #the real deal! plotData = [] xtickLabels = [] trendData = {} annot = {} minSize = -1 for inputFile, header, cols in zip(inputFiles, headers, valcols): fin = generic_istream(inputFile) startIdx = len(plotData) if firstColAnnot: colAnnot = cols[0] cols = cols[1:] annotThisFile = [] annot[startIdx] = annotThisFile else: colAnnot = -1 annotThisFile = None for col in cols: plotData.append([]) xtickLabels.append(header[col]) colIndices = range(startIdx, startIdx + len(cols)) if plotTrend: #print >> stderr,"plotTrend" trendDataThisFile = [] trendData[startIdx] = trendDataThisFile else: trendDataThisFile = None lino = 0 for lin in fin: lino += 1 if lino < startRow: continue fields = lin.rstrip("\r\n").split(sep) if plotTrend: #print >> stderr,"a" trendDataThisLine = [] else: trendDataThisLine = None allDataOKThisLine = True if colAnnot >= 0: annotThisFile.append(fields[colAnnot]) for idx, col in zip(colIndices, cols): try: value = float(fields[col]) if logb != 0: if value == 0.0: raise ValueError value = log(value) / logb plotData[idx].append(value) if plotTrend: trendDataThisLine.append(value) #print >> stderr,"value:",value except: allDataOKThisLine = False if plotTrend: if allDataOKThisLine: trendDataThisFile.append(trendDataThisLine) else: trendDataThisFile.append(None) fin.close() if minSize == -1: minSize = len(plotData[idx]) #or startIDX? else: minSize = min([minSize, len(plotData[idx])]) if trimToMinSize: print >> stderr, "trimming to min size =", minSize trimData(plotData, minSize) if len(relabels) > 0: #if len(relabels)!=len(xtickLabels): # print >> stderr,"relabels doesn't have the same length as original label vectors",xtickLabels,"=>",relabels # exit() print >> stderr, xtickLabels print >> stderr, relabels for i, relabel in zip(range(0, len(relabels)), relabels): xtickLabels[i] = relabel for i in range(0, len(plotMedianForGroups)): plotMedianForGroups[i] = getCol0ListFromCol1ListStringAdv( xtickLabels, plotMedianForGroups[i]) #drawing medians: medianToDraw = [] for mediangrouper in plotMedianForGroups: curD = [] for c in mediangrouper: curD.extend(plotData[c]) medianToDraw.append(median(curD)) for c in range(len(plotData) - 1, -1, -1): if len(plotData[c]) < minNDataToKeep: print >> stderr, xtickLabels[c], "discarded because has only", len( plotData[c]), "data points <", minNDataToKeep del plotData[c] del xtickLabels[c] if not skipStat: print >> stdout, "student t-test (1 sample; mean=0)" print >> stdout, "sample", "mean", "p-val", "median" if writeDataSummaryStat: fDSS = open(writeDataSummaryStat, "w") print >> fDSS, "sample\tmean\tvar\tsd\tmin\tmax\tN\tNInRange[" + str( summaryStatRange[0]) + "," + str( summaryStatRange[1] ) + "]\t%NInRange\tNbelowRange\t%Nbelow\tNAboveRange\t%NAbove" for x in range(0, len(plotData)): #print >> stderr, len(plotData[x]) try: print >> stdout, xtickLabels[x], mean( plotData[x]), ttest_1samp(plotData[x], 0)[1], median(plotData[x]) except: print >> stdout, xtickLabels[x], mean( plotData[x]), "NA", median(plotData[x]) if writeDataSummaryStat: sumData, N, NIN, NBelow, NAbove = filterDataInRangeInclusive( plotData[x], summaryStatRange[0], summaryStatRange[1]) if NIN > 1: #print >> stderr,"sumData=",sumData #print >> stderr,mean mea = mean2(sumData) DDOF = 1 sd = std(sumData, ddof=DDOF) var = sd * sd mi = min(sumData) ma = max(sumData) else: mea = "NA" sd = "NA" var = "NA" mi = "NA" ma = "NA" print >> fDSS, xtickLabels[x] + "\t" + str(mea) + "\t" + str( var) + "\t" + str(sd) + "\t" + str(mi) + "\t" + str( ma) + "\t" + str(N) + "\t" + str(NIN) + "\t" + str( float(NIN) * 100 / N) + "\t" + str(NBelow) + "\t" + str( float(NBelow) * 100 / N) + "\t" + str(NAbove) + "\t" + str( float(NAbove) * 100 / N) pvalueM = [] if writeDataSummaryStat: fDSS.close() print >> stdout, "" print >> stdout, "student t-test (2 samples)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue = ttest_ind(plotData[x], plotData[y])[1] except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout, "" print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_t_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_t", xtickLabels, pvalueM, methodCluster) pvalueM = [] print >> stdout, "welch t-test" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: try: pvalue = welchs_approximate_ttest_arr( plotData[x], plotData[y])[3] except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, str(pvalue), pvalueRow.append(pvalue) print >> stdout, "" if outXYZPvalues: writeXYZPvalues(outXYZPvalues + "_Welch.xyz", xtickLabels, pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_Welch_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_Welch", xtickLabels, pvalueM, methodCluster) print >> stdout, "" print >> stdout, "non-parametric (Mann-Whitney U)" #"non-parametric (Mann-Whitney U if larger n<=20 else Wilcoxon)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = mannwhitneyu(plotData[x], plotData[y])[1] * 2 except: pvalue = 1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, #mann-whiteney need to mul by 2 (one tail to two tail) pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if outXYZPvalues: writeXYZPvalues(outXYZPvalues + "_U.xyz", xtickLabels, pvalueM) if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_U_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_U", xtickLabels, pvalueM, methodCluster) #####now the variance tests print >> stdout, "" print >> stdout, "Ansari-Bradley Two-sample Test for difference in scale parameters " print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = ansari(plotData[x], plotData[y])[1] except: pvalue = "NA" if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 #pvalue=1.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_Ansari_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_Ansari", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Fligner's Two-sample Test for equal variance (non-parametrics)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = fligner(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_fligner_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_fligner", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Levene's Two-sample Test for equal variance" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = levene(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_levene_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_levene", xtickLabels, pvalueM, methodCluster) ##### #####now the variance tests print >> stdout, "" print >> stdout, "Bartlett's Two-sample Test for equal variance (for normal distributions)" print >> stdout, "p-val", for x in range(0, len(plotData)): print >> stdout, xtickLabels[x], pvalueM = [] print >> stdout, "" for x in range(0, len(plotData)): pvalueRow = [] pvalueM.append(pvalueRow) print >> stdout, xtickLabels[x], for y in range(0, len(plotData)): if y <= x: print >> stdout, "", if x == y: if minuslog10pvalue: pvalueRow.append(0.0) else: pvalueRow.append(1.0) else: pvalueRow.append(pvalueM[y][x]) else: #if max(len(plotData[x]),len(plotData[y]))<=20: try: pvalue = bartlett(plotData[x], plotData[y])[1] except: pvalue = "NA" #pvalue=1.0 if minuslog10pvalue and str(pvalue) != "NA": try: pvalue = -1 * log(pvalue, 10) except: pvalue = -1000.0 print >> stdout, pvalue, pvalueRow.append(pvalue) #else: # print >> stdout,wilcoxon(plotData[x],plotData[y])[1], # this is two-tailed already stdout, "", # print >> stdout, "" if plotPvalueCluster: makePValueRawPlot(outputClusterPrefix + "_bartlett_raw", xtickLabels, pvalueM) makePValueClusterPlot(outputClusterPrefix + "_bartlett", xtickLabels, pvalueM, methodCluster) ##### figure(figsize=figsz) subplots_adjust(top=0.9, bottom=botta, left=0.2, right=0.8) if len(titl) == 0: titl = outputFile plotExpBox(plotData, xtickLabels, showIndPoints, mark, markMean, showMean, notch, whisker, outliers, xlegendrotation, xlabe, ylabe, titl, showSampleSizes, showViolin, showBox, annot, trendData, showLegend, makePzfxFile, makeBinMatrix, dividePlots) #ylim([0,200]) for m in medianToDraw: axhline(y=m, linestyle=':', color='gray') savefig(outputFile, bbox_inches="tight") if len(plotHistogramToFile) > 0: drawHistogram(plotHistogramToFile, plotData, xtickLabels) drawDensigram(plotHistogramToFile + ".density.png", plotData, xtickLabels)
def test_small(self): x = [1, 2, 3, 3, 4] y = [3, 2, 6, 1, 6, 1, 4, 1] W, pval = stats.ansari(x, y) assert_almost_equal(W, 23.5, 11) assert_almost_equal(pval, 0.13499256881897437, 11)
def features(parameters: dict, source_id: int): print("Start Table: {0}".format(source_id)) db_distribution = pymysql.connect(parameters['hostname'], parameters['username'], parameters['password'], parameters["database_distribution"], charset='utf8mb4') cursor_distribution = db_distribution.cursor() sql = "TRUNCATE `{0}`.`evaluation_{1}`;".format( parameters["database_distribution"], source_id) cursor_distribution.execute(sql) sql_cols = generate_cols(parameters, "DIS") sql = "SELECT `IPDDIS`,`COUNTDIS`,{2} FROM `{0}`.`distribution_{1}` " \ "WHERE `PARAID`='0' and `SOURCEID`='{1}' LIMIT 1;".format(parameters["database_distribution"], source_id, sql_cols) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() referenceIPDValues = dict_to_dis_list(json_map_to_dict(cursor_result[0])) referenceCOUNTValues = dict_to_dis_list(json_map_to_dict(cursor_result[1])) referenceWINValues = dict() columeBegin = 2 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): referenceWINValues[index] = dict_to_dis_list( json_map_to_dict(cursor_result[columeBegin])) columeBegin = columeBegin + 1 sql_cols = generate_cols(parameters, "PMF") sql = "SELECT `IPDPMF`,`COUNTPMF`,{2} FROM `{0}`.`pmf_{1}` " \ "WHERE `PARAID`='0' and `SOURCEID`='{1}' LIMIT 1;".format(parameters["database_distribution"], source_id, sql_cols) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() referenceIPDPMF = json_map_to_dict(cursor_result[0]) referenceCOUNTPMF = json_map_to_dict(cursor_result[1]) referenceWINPMF = dict() columeBegin = 2 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): referenceWINPMF[index] = json_map_to_dict(cursor_result[columeBegin]) columeBegin = columeBegin + 1 sql_cols = generate_cols(parameters, "CDF") sql = "SELECT `IPDCDF`,`COUNTCDF`,{2} FROM `{0}`.`cdf_{1}` " \ "WHERE `PARAID`='0' and `SOURCEID`='{1}' LIMIT 1;".format(parameters["database_distribution"], source_id, sql_cols) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() referenceIPDCDF = json_map_to_dict(cursor_result[0]) referenceCOUNTCDF = json_map_to_dict(cursor_result[1]) referenceWINCDF = dict() columeBegin = 2 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): referenceWINCDF[index] = json_map_to_dict(cursor_result[columeBegin]) columeBegin = columeBegin + 1 sql = "SELECT `ID` FROM `{0}`.`distribution_{1}` WHERE" \ " `ID`!=(SELECT `ID` FROM `{0}`.`distribution_{1}` WHERE `SOURCEID`='{1}' AND `PARAID`='0' LIMIT 1);".format( parameters["database_distribution"], source_id) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchall() ids = list() for id in cursor_result: ids.append(int(id[0])) print("Table {0} : {1}".format(source_id, ids)) for id in ids: sql_cols = generate_cols(parameters, "DIS") sql = "SELECT `SOURCEID`,`PARAID`,`IPDDIS`,`COUNTDIS`,{3} " \ "FROM `{0}`.`distribution_{1}` WHERE `ID`='{2}';".format( parameters["database_distribution"], source_id, id, sql_cols) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() sourceID = cursor_result[0] paraID = cursor_result[1] currentIPDValues = dict_to_dis_list(json_map_to_dict(cursor_result[2])) currentCOUNTValues = dict_to_dis_list( json_map_to_dict(cursor_result[3])) currentWINValues = dict() columeBegin = 4 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): currentWINValues[index] = dict_to_dis_list( json_map_to_dict(cursor_result[columeBegin])) columeBegin = columeBegin + 1 sql_cols = generate_cols(parameters, "PMF") sql = "SELECT `IPDPMF`,`COUNTPMF`,{3} FROM `{0}`.`pmf_{1}` WHERE `PARAID`='{2}' AND `SOURCEID`='{4}';".format( parameters["database_distribution"], source_id, paraID, sql_cols, sourceID) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() currentIPDPMF = json_map_to_dict(cursor_result[0]) currentCOUNTPMF = json_map_to_dict(cursor_result[1]) currentWINPMF = dict() columeBegin = 2 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): currentWINPMF[index] = json_map_to_dict(cursor_result[columeBegin]) columeBegin = columeBegin + 1 sql_cols = generate_cols(parameters, "CDF") sql = "SELECT `IPDCDF`,`COUNTCDF`,{3} FROM `{0}`.`cdf_{1}` WHERE `PARAID`='{2}' AND `SOURCEID`='{4}';".format( parameters["database_distribution"], source_id, paraID, sql_cols, sourceID) cursor_distribution.execute(sql) cursor_result = cursor_distribution.fetchone() currentIPDCDF = json_map_to_dict(cursor_result[0]) currentCOUNTCDF = json_map_to_dict(cursor_result[1]) currentWINCDF = dict() columeBegin = 2 for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): currentWINCDF[index] = json_map_to_dict(cursor_result[columeBegin]) columeBegin = columeBegin + 1 nonzero_referenceIPDPMF = pmf_to_list(referenceIPDPMF) nonzero_currentIPDPMF = pmf_to_list(currentIPDPMF) aligned_IPDPMF = align_pmf(nonzero_currentIPDPMF, nonzero_referenceIPDPMF) ks_ipd = stats.ks_2samp(referenceIPDValues, currentIPDValues) ttest_ipd = stats.ttest_ind(referenceIPDValues, currentIPDValues, axis=0, equal_var=False) kld_ipd = stats.entropy(aligned_IPDPMF[0], aligned_IPDPMF[1]) if math.isinf(kld_ipd): kld_ipd = 1.0 whitney_ipd = stats.mannwhitneyu(referenceIPDValues, currentIPDValues, use_continuity=False, alternative='two-sided') ansari_ipd = stats.ansari(referenceIPDValues, currentIPDValues) max_ipd_cdf_count = min(len(referenceIPDCDF), len(currentIPDCDF)) wasserstein_ipd = wasserstein_distance_cdf( cdf_to_list(referenceIPDCDF)[:max_ipd_cdf_count], cdf_to_list(currentIPDCDF)[:max_ipd_cdf_count]) energy_ipd = energy_distance_cdf( cdf_to_list(referenceIPDCDF)[:max_ipd_cdf_count], cdf_to_list(currentIPDCDF)[:max_ipd_cdf_count]) print("PARAID = {0} : ".format(paraID), end='\t') print( "IPD [ks={0},\t ttest={1},\t kld={2},\t whitney={3},\t ansari={4},\t wasserstein={5},\t energy={6}]" .format(ks_ipd[1], ttest_ipd[1], kld_ipd, whitney_ipd[1], ansari_ipd[1], wasserstein_ipd, energy_ipd), end='\t') ansari_count = stats.ansari(referenceCOUNTValues, currentCOUNTValues) nonzero_referenceCOUNTPMF = pmf_to_list(referenceCOUNTPMF) nonzero_currentCOUNTPMF = pmf_to_list(currentCOUNTPMF) aligned_COUNTPMF = align_pmf(nonzero_currentCOUNTPMF, nonzero_referenceCOUNTPMF) kld_count = stats.entropy(aligned_COUNTPMF[0], aligned_COUNTPMF[1]) if math.isinf(kld_count): kld_count = 1.0 ks_count = stats.ks_2samp(referenceCOUNTValues, currentCOUNTValues) ttest_count = stats.ttest_ind(referenceCOUNTValues, currentCOUNTValues, axis=0, equal_var=False) whitney_count = stats.mannwhitneyu(referenceCOUNTValues, currentCOUNTValues, use_continuity=False, alternative='two-sided') aligned_referenceCOUNTCDF = cdf_to_list(referenceCOUNTCDF) aligned_currentCOUNTCDF = cdf_to_list(currentCOUNTCDF) max_count_cdf = min(len(aligned_referenceCOUNTCDF), len(aligned_currentCOUNTCDF)) wasserstein_count = wasserstein_distance_cdf( aligned_referenceCOUNTCDF[:max_count_cdf], aligned_currentCOUNTCDF[:max_count_cdf]) energy_count = energy_distance_cdf( aligned_referenceCOUNTCDF[:max_count_cdf], aligned_currentCOUNTCDF[:max_count_cdf]) print( "COUNT [ks={0},\t ttest={1},\t whitney={2},\t ansari={3},\t kld={4},\t wasserstein={5},\t energy={6}]" .format(ks_count[1], ttest_count[1], whitney_count[1], ansari_count[1], kld_count, wasserstein_count, energy_count), end='\t') kld_win = dict() wasserstein_win = dict() energy_win = dict() for index in range(int(parameters["WIN_BEGIN"]), int(parameters["WIN_END"]) + 1, int(parameters["WIN_STEP"])): nonzero_referenceWINCDF = pmf_to_list(referenceWINCDF[index]) nonzero_currentWINCDF = pmf_to_list(currentWINCDF[index]) aligned_WINCDF = align_pmf(nonzero_currentWINCDF, nonzero_referenceWINCDF) kld_win[index] = stats.entropy(aligned_WINCDF[0], aligned_WINCDF[1]) # nonzero_referenceWINPMF = pmf_to_list(referenceWINPMF[index]) # nonzero_currentWINPMF = pmf_to_list(currentWINPMF[index]) # aligned_WINPMF = align_pmf(nonzero_currentWINPMF, nonzero_referenceWINPMF) # kld_win[index] = stats.entropy(aligned_WINPMF[0], aligned_WINPMF[1]) # kld_win[index] = stats.entropy(cdf_to_list(referenceWINCDF[index]), cdf_to_list(currentWINCDF[index])) # wasserstein_win[index] = stats.wasserstein_distance(cdf_to_list(referenceWINCDF[index]), cdf_to_list(currentWINCDF[index])) # energy_win[index] = stats.energy_distance(cdf_to_list(referenceWINCDF[index]), cdf_to_list(currentWINCDF[index])) # wasserstein_win[index] = stats.wasserstein_distance(pmf_to_list(referenceWINPMF[index]), pmf_to_list(currentWINPMF[index])) #energy_win[index] = stats.energy_distance(pmf_to_list(referenceWINPMF[index]), pmf_to_list(currentWINPMF[index])) # print("WIN{0} [kld={1},\t wasserstein={2},\t energy={3}]".format( # index, kld_win[index], wasserstein_win[index], energy_win[index]), end='\t') wasserstein_win[index] = wasserstein_distance_cdf( cdf_to_list(referenceWINCDF[index]), cdf_to_list(currentWINCDF[index])) energy_win[index] = energy_distance_cdf( cdf_to_list(referenceWINCDF[index]), cdf_to_list(currentWINCDF[index])) print("") col_names = str() col_values = str() for key, value in kld_win.items(): col_names = col_names + "`KLD-WIN{0}`,`WASSERSTEIN-WIN{0}`,`ENERGY-WIN{0}`,".format( key) if math.isinf(value): value = 1 col_values = col_values + "'{0}','{1}','{2}',".format( value, wasserstein_win[key], energy_win[key]) sql = "INSERT INTO `{0}`.`evaluation_{1}` " \ "({2}`KSP-IPD`,`KLD-IPD`,`TTESTP-IPD`,`WHITNEYP-IPD`,`ANSARIP-IPD`,`WASSERSTEIN-IPD`,`ENERGY-IPD`," \ "`KSP-COUNT`,`KLD-COUNT`,`TTESTP-COUNT`,`WHITNEYP-COUNT`,`ANSARIP-COUNT`,`WASSERSTEIN-COUNT`,`ENERGY-COUNT`," \ "`SOURCEID`,`PARAID`)" \ " VALUES({3}'{4}','{5}','{6}','{7}','{8}','{9}','{10}','{11}','{12}','{13}','{14}','{15}','{16}','{17}'," \ "'{18}', '{19}');".format( parameters["database_distribution"], source_id, col_names, col_values, ks_ipd[1], kld_ipd, ttest_ipd[1], whitney_ipd[1], ansari_ipd[1], wasserstein_ipd, energy_ipd, ks_count[1], kld_count, ttest_count[1], whitney_count[1], ansari_count[1], wasserstein_count, energy_count, sourceID, paraID) # print(sql) cursor_distribution.execute(sql) db_distribution.commit() cursor_distribution.close() db_distribution.close()
def test_ansariBradleyTest_exact_xResult(self): data_1 = [-63, 18, 84, 160, 33, -82, 49, 74, 58, -31, 151] data_2 = [78, -124, -443, 225, -9, -3, 189, 164, 119, 184] x1, p1 = ansari_bradley_test(data_1, data_2, alternative="two-sided") x2, p2 = ansari(data_1, data_2) assert pytest.approx(x2) == x1