def test_pearsonr(self): "Testing pearsonr" data1 = [self.L, self.A] data2 = [self.M, self.B] results = (0.80208084775070976, 2.1040104471429959e-005) i = 0 for d in data1: self.assertEqual(stats.pearsonr(d, data2[i])[i], results[i]) i += 1
def test_pearsonr(self): "Testing pearsonr" data1 = [ self.L, self.A ] data2 = [ self.M, self.B ] results = (0.80208084775070976, 2.1040104471429959e-005) i = 0 for d in data1: self.assertEqual( stats.pearsonr( d, data2[i] )[i], results[i] ) i += 1
m = list(range(4, 24)) m[10] = 34 b = N.array(m) pb = [0] * 9 + [1] * 11 apb = N.array(pb) print('paired:') #stats.paired(l,m) #stats.paired(a,b) print() print() print('pearsonr:') print(stats.pearsonr(l, m)) print(stats.pearsonr(a, b)) print('spearmanr:') print(stats.spearmanr(l, m)) print(stats.spearmanr(a, b)) print('pointbiserialr:') print(stats.pointbiserialr(pb, l)) print(stats.pointbiserialr(apb, a)) print('kendalltau:') print(stats.kendalltau(l, m)) print(stats.kendalltau(a, b)) print('linregress:') print(stats.linregress(l, m)) print(stats.linregress(a, b)) print('\nINFERENTIAL')
def evaluate( self, *args, **params): return _stats.pearsonr(*args, **params)
m = range(4,24) m[10] = 34 b = N.array(m) pb = [0]*9 + [1]*11 apb = N.array(pb) print 'paired:' #stats.paired(l,m) #stats.paired(a,b) print print print 'pearsonr:' print stats.pearsonr(l,m) print stats.pearsonr(a,b) print 'spearmanr:' print stats.spearmanr(l,m) print stats.spearmanr(a,b) print 'pointbiserialr:' print stats.pointbiserialr(pb,l) print stats.pointbiserialr(apb,a) print 'kendalltau:' print stats.kendalltau(l,m) print stats.kendalltau(a,b) print 'linregress:' print stats.linregress(l,m) print stats.linregress(a,b) print '\nINFERENTIAL'
SNP2 = {} for line in fileinput.input([File2]): rowlist = [] rowlist = (line.rstrip("\n")).split('\t') if line.startswith("#") or rowlist[0] == "chrX": continue else: if rowlist[0]+"\t"+rowlist[1] in SNP1 and int(rowlist[4])+int(rowlist[6]) >= 30: SNP2[rowlist[0]+"\t"+rowlist[1]] = 0.0 SNP2[rowlist[0]+"\t"+rowlist[1]] = float(rowlist[4])/float(int(rowlist[4])+int(rowlist[6])) Array1 = [] Array2 = [] from statlib import stats reload(stats) for snp in SNP2: output.write(str(SNP2[snp])+"\t"+str(SNP1[snp])+"\n") Array2.append(SNP2[snp]) Array1.append(SNP1[snp]) print pearson(Array1,Array2) print stats.pearsonr(Array1,Array2) print len(Array2) output.close()
stats.attest_ind(array(a), array(b), 0, 1) print '\n\nRelated Samples t-test' before = [11, 16, 20, 17, 10] after = [8, 11, 15, 11, 11] print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359' stats.ttest_rel(before, after, 1, 'Before', 'After') stats.attest_rel(array(before), array(after), 1, 'Before', 'After') print "\n\nPearson's r" x = [0, 0, 1, 1, 1, 2, 2, 3, 3, 4] y = [8, 7, 7, 6, 5, 4, 4, 4, 2, 0] print 'SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190' print stats.pearsonr(x, y) print stats.apearsonr(array(x), array(y)) print "\n\nSpearman's r" x = [4, 1, 9, 8, 3, 5, 6, 2, 7] y = [3, 2, 8, 6, 5, 4, 7, 1, 9] print '\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193' print stats.spearmanr(x, y) print stats.aspearmanr(array(x), array(y)) print '\n\nPoint-Biserial r' gender = [1, 1, 1, 1, 2, 2, 2, 2, 2, 2] score = [35, 38, 41, 40, 60, 65, 65, 68, 68, 64] print '\nSHOULD BE +0.981257 (N=10) ... Basic Stats 1st ed, p.197'
def plot(self,init=None): """Interactive plotting of parsing features.""" if not init: init=self init.plotstats={} init.unitfeats=self._getFeatValDict() sels=[] for k,v in sorted(init.unitfeats.items()): for kk,vv in v.items(): sels.append((k,kk)) conditions={'x':[],'y':[]} targets={'x':[],'y':[]} pkey={'x':[],'y':[]} print str(0)+"\t[no condition]" for selnum in range(len(sels)): print str(selnum+1)+"\t"+str(sels[selnum][0])+" = "+str(sels[selnum][1]) print stepnum=0 for a in sorted(conditions.keys()): stepnum+=1 sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of population] please type in the number (or numbers separated by commas)\n\tof the conditions determining the total population from which the percentage of the sample is taken:\n").strip() for x in sel.split(","): try: conditions[a].append(sels[int(x)-1]) pkey[a]+=["of_"+str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])] except: pass stepnum+=1 sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of sample] please type in the number (or numbers separated by commas)\n\tof the conditions determining the sample:\n").strip() for x in sel.split(","): try: targets[a].append(sels[int(x)-1]) pkey[a]+=[str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])] except: pass # print ">> POPULATION:" # print conditions # # print ">> SAMPLE:" # print targets init.population=conditions init.sample=targets init.pkey="X_"+"-".join(pkey['x'])+"."+"Y_"+"-".join(pkey['y']) print ">> plotting: "+ init.pkey if not hasattr(self,'bestparses'): for child in self.children: child.plot(init) else: posdict={} #if not hasattr(self,'minparselen'): return None for posnum in range(self.minparselen): posdict[posnum]={'x':[],'y':[]} for parse in self.__bestparses: posfeats=parse.positions[posnum].posfeats() for a in ['x','y']: conds=init.population[a] targs=init.sample[a] condsHold=True if len(conds): for cond in conds: condK=cond[0] condV=cond[1] try: if posfeats[condK]==condV: continue except: condsHold=False break condsHold=False break if condsHold: targsHold=True for targ in targs: targK=targ[0] targV=targ[1] try: if posfeats[targK]==targV: continue except: targsHold=False break targsHold=False break if targsHold: posdict[posnum][a].append(1) else: posdict[posnum][a].append(0) for a in ['x','y']: if not posdict[posnum][a]: print "<< not enough data: position number ("+str(posnum)+") empty on dimension ["+str(a)+"]" return None init.plotstats[self.getName()]=posdict if (self!=init): return None totalstrs=[] totaltsvs=[] for textname,posdict in sorted(init.plotstats.items()): tsv="posnum\tx_mean\ty_mean\tx_std\ty_std\n" xs=[] ys=[] for posnum,xydict in posdict.items(): x_avg,x_std=mean_stdev(xydict['x']) y_avg,y_std=mean_stdev(xydict['y']) xs.append(x_avg) ys.append(y_avg) tsv+="\t".join(str(bb) for bb in [(posnum+1),x_avg,y_avg,x_std,y_std])+"\n" ccmsg="" cc=None p=None try: from statlib import stats (cc,p)=stats.pearsonr(xs,ys) aa=makeminlength(" correlation coefficient: ",int(being.linelen/1.4))+str(cc) bb=makeminlength(" p-value: ",int(being.linelen/1.4))+str(p) tsv+=aa.strip().replace(":",":\t")+"\n" tsv+=bb.strip().replace(":",":\t")+"\n" for l in tsv.split("\n"): totaltsvs.append(textname+"_"+l) ccmsg+=aa+"\n"+bb+"\n" except: pass writeToFile(textname,init.pkey,tsv,extension="tsv") totaltsvs.append(tsv) try: strtowrite=self.makeBubbleChart(posdict,".".join([textname,init.pkey]),(cc,p)) totalstrs+=[strtowrite] writeToFile(textname,init.pkey,self._getBubbleHeader()+strtowrite+self._getBubbleFooter(),extension="htm") except: pass if ccmsg: print ccmsg if not self.classname()=="Corpus": return None writeToFile(self.getName(), init.pkey, self._getBubbleHeader()+"\n<br/><br/><br/><br/><br/><br/><br/><br/>\n".join(totalstrs)+self._getBubbleFooter(), iscorpus=True, extension="htm") writeToFile(self.getName(),init.pkey,"\n\n\n\n".join(totaltsvs),iscorpus=True,extension="tsv")
for line in fileinput.input([File2]): rowlist = [] rowlist = (line.rstrip("\n")).split('\t') if line.startswith("#") or rowlist[0] == "chrX": continue else: if rowlist[0] + "\t" + rowlist[1] in SNP1 and int(rowlist[4]) + int( rowlist[6]) >= 30: SNP2[rowlist[0] + "\t" + rowlist[1]] = 0.0 SNP2[rowlist[0] + "\t" + rowlist[1]] = float( rowlist[4]) / float(int(rowlist[4]) + int(rowlist[6])) Array1 = [] Array2 = [] from statlib import stats reload(stats) for snp in SNP2: output.write(str(SNP2[snp]) + "\t" + str(SNP1[snp]) + "\n") Array2.append(SNP2[snp]) Array1.append(SNP1[snp]) print pearson(Array1, Array2) print stats.pearsonr(Array1, Array2) print len(Array2) output.close()
def plot(self,init=None): """Interactive plotting of parsing features.""" if not init: init=self init.plotstats={} init.unitfeats=self._getFeatValDict() sels=[] for k,v in sorted(init.unitfeats.items()): for kk,vv in v.items(): sels.append((k,kk)) conditions={'x':[],'y':[]} targets={'x':[],'y':[]} pkey={'x':[],'y':[]} print str(0)+"\t[no condition]" for selnum in range(len(sels)): print str(selnum+1)+"\t"+str(sels[selnum][0])+" = "+str(sels[selnum][1]) print stepnum=0 for a in sorted(conditions.keys()): stepnum+=1 sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of population] please type in the number (or numbers separated by commas)\n\tof the conditions determining the total population from which the percentage of the sample is taken:\n").strip() for x in sel.split(","): try: conditions[a].append(sels[int(x)-1]) pkey[a]+=["of_"+str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])] except: pass stepnum+=1 sel=raw_input(">> [step "+str(stepnum)+"/4] ["+a+" coord] [conditions of sample] please type in the number (or numbers separated by commas)\n\tof the conditions determining the sample:\n").strip() for x in sel.split(","): try: targets[a].append(sels[int(x)-1]) pkey[a]+=[str(sels[int(x)-1][0])+"_is_"+str(sels[int(x)-1][1])] except: pass # print ">> POPULATION:" # print conditions # # print ">> SAMPLE:" # print targets init.population=conditions init.sample=targets init.pkey="X_"+"-".join(pkey['x'])+"."+"Y_"+"-".join(pkey['y']) print ">> plotting: "+ init.pkey if not hasattr(self,'bestParses'): for child in self.children: child.plot(init) else: posdict={} minparselen = min([len(parse.positions) for parse in self.bestParses()]) maxparselen = max([len(parse.positions) for parse in self.bestParses()]) #if not hasattr(self,'minparselen'): return None for posnum in range(maxparselen): posdict[posnum]={'x':[],'y':[]} for parse in self.bestParses(): try: posfeats=parse.positions[posnum].posfeats() except IndexError: # there is no position number `posnum` in this parse `parse` continue for a in ['x','y']: conds=init.population[a] targs=init.sample[a] condsHold=True if len(conds): for cond in conds: condK=cond[0] condV=cond[1] try: if posfeats[condK]==condV: continue except: condsHold=False break condsHold=False break if condsHold: targsHold=True for targ in targs: targK=targ[0] targV=targ[1] try: if posfeats[targK]==targV: continue except: targsHold=False break targsHold=False break if targsHold: posdict[posnum][a].append(1) else: posdict[posnum][a].append(0) for a in ['x','y']: if not posdict[posnum][a]: print "<< not enough data: position number ("+str(posnum)+") empty on dimension ["+str(a)+"]" return None init.plotstats[self.getName()]=posdict if (self!=init): return None totalstrs=[] totaltsvs=[] for textname,posdict in sorted(init.plotstats.items()): tsv="posnum\tnumobs\tx_mean\ty_mean\tx_std\ty_std\n" xs=[] ys=[] for posnum,xydict in posdict.items(): x_avg,x_std=mean_stdev(xydict['x']) y_avg,y_std=mean_stdev(xydict['y']) assert len(xydict['x'])==len(xydict['y']) xs.append(x_avg) ys.append(y_avg) tsv+="\t".join(str(bb) for bb in [(posnum+1),len(xydict['x']),x_avg,y_avg,x_std,y_std])+"\n" ccmsg="" cc=None p=None try: from statlib import stats (cc,p)=stats.pearsonr(xs,ys) aa=makeminlength(" correlation coefficient: ",int(being.linelen/1.4))+str(cc) bb=makeminlength(" p-value: ",int(being.linelen/1.4))+str(p) tsv+=aa.strip().replace(":",":\t")+"\n" tsv+=bb.strip().replace(":",":\t")+"\n" for l in tsv.split("\n"): totaltsvs.append(textname+"_"+l) ccmsg+=aa+"\n"+bb+"\n" except: pass writeToFile(textname,init.pkey,tsv,extension="tsv") totaltsvs.append(tsv) """ try: strtowrite=self.makeBubbleChart(posdict,".".join([textname,init.pkey]),(cc,p)) totalstrs+=[strtowrite] writeToFile(textname,init.pkey,self._getBubbleHeader()+strtowrite+self._getBubbleFooter(),extension="htm") except: pass """ if ccmsg: print ccmsg if not self.classname()=="Corpus": return None """ writeToFile(self.getName(), init.pkey, self._getBubbleHeader()+"\n<br/><br/><br/><br/><br/><br/><br/><br/>\n".join(totalstrs)+self._getBubbleFooter(), iscorpus=True, extension="htm") """ writeToFile(self.getName(),init.pkey,"\n\n\n\n".join(totaltsvs),iscorpus=True,extension="tsv")
print '\n\nRelated Samples t-test' before = [11,16,20,17,10] after = [8,11,15,11,11] print '\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359' stats.ttest_rel(before,after,1,'Before','After') stats.attest_rel(array(before),array(after),1,'Before','After') print "\n\nPearson's r" x = [0,0,1,1,1,2,2,3,3,4] y = [8,7,7,6,5,4,4,4,2,0] print 'SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190' print stats.pearsonr(x,y) print stats.apearsonr(array(x),array(y)) print "\n\nSpearman's r" x = [4,1,9,8,3,5,6,2,7] y = [3,2,8,6,5,4,7,1,9] print '\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193' print stats.spearmanr(x,y) print stats.aspearmanr(array(x),array(y)) print '\n\nPoint-Biserial r' gender = [1,1,1,1,2,2,2,2,2,2]
m = range(4, 24) m[10] = 34 b = N.array(m) pb = [0] * 9 + [1] * 11 apb = N.array(pb) print 'paired:' #stats.paired(l,m) #stats.paired(a,b) print print print 'pearsonr:' print stats.pearsonr(l, m) print stats.pearsonr(a, b) print 'spearmanr:' print stats.spearmanr(l, m) print stats.spearmanr(a, b) print 'pointbiserialr:' print stats.pointbiserialr(pb, l) print stats.pointbiserialr(apb, a) print 'kendalltau:' print stats.kendalltau(l, m) print stats.kendalltau(a, b) print 'linregress:' print stats.linregress(l, m) print stats.linregress(a, b) print '\nINFERENTIAL'
print('\n\nRelated Samples t-test') before = [11,16,20,17,10] after = [8,11,15,11,11] print('\n\nSHOULD BE t=+2.88, 0.01<p<0.05 (df=4) ... Basic Stats 1st ed, p.359') stats.ttest_rel(before,after,1,'Before','After') stats.attest_rel(array(before),array(after),1,'Before','After') print("\n\nPearson's r") x = [0,0,1,1,1,2,2,3,3,4] y = [8,7,7,6,5,4,4,4,2,0] print('SHOULD BE -0.94535 (N=10) ... Basic Stats 1st ed, p.190') print(stats.pearsonr(x,y)) print(stats.apearsonr(array(x),array(y))) print("\n\nSpearman's r") x = [4,1,9,8,3,5,6,2,7] y = [3,2,8,6,5,4,7,1,9] print('\nSHOULD BE +0.85 on the dot (N=9) ... Basic Stats 1st ed, p.193') print(stats.spearmanr(x,y)) print(stats.aspearmanr(array(x),array(y))) print('\n\nPoint-Biserial r') gender = [1,1,1,1,2,2,2,2,2,2]