def computeDistMeansForComplex(cid,N,pdbpklpath,pppath): """ code for getting distance and auc information """ L=myPDB.loader(os.path.join(pdbpklpath,cid+'_l_u.pdb.pkl')) R=myPDB.loader(os.path.join(pdbpklpath,cid+'_r_u.pdb.pkl')) if type(pppath)==type(''): (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=readFile(pppath+cid+'.pairpred.txt',usePDBidx=False) else: (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=pppath lauc=None rauc=None try: (_,_,lauc)=roc.roc(list(np.array(lauc.values())[:,0]),list(np.array(lauc.values())[:,1])) (_,_,rauc)=roc.roc(list(np.array(rauc.values())[:,0]),list(np.array(rauc.values())[:,1])) except: pass Mlx=np.random.random(Ml.shape) Mlx[Ml<0]=-1 (r,c,v)=sortScores(Mlx) lD=getDistMat(getCoords(L.R)) rD=getDistMat(getCoords(R.R)) #pdb.set_trace() M=20 return pauc,lauc,rauc,getDistMean(lD,r,top=True,M=M), getDistMean(lD,r,top=False,M=M), getDistMean(rD,c,top=True,M=M), getDistMean(rD,c,top=False,M=M)
def computeDistMeansForComplex(cid, N, pdbpklpath, pppath): """ code for getting distance and auc information """ L = myPDB.loader(os.path.join(pdbpklpath, cid + '_l_u.pdb.pkl')) R = myPDB.loader(os.path.join(pdbpklpath, cid + '_r_u.pdb.pkl')) if type(pppath) == type(''): (pauc, Mv, Ml, lseq, rseq, lauc, rauc) = readFile(pppath + cid + '.pairpred.txt', usePDBidx=False) else: (pauc, Mv, Ml, lseq, rseq, lauc, rauc) = pppath lauc = None rauc = None try: (_, _, lauc) = roc.roc(list(np.array(lauc.values())[:, 0]), list(np.array(lauc.values())[:, 1])) (_, _, rauc) = roc.roc(list(np.array(rauc.values())[:, 0]), list(np.array(rauc.values())[:, 1])) except: pass Mlx = np.random.random(Ml.shape) Mlx[Ml < 0] = -1 (r, c, v) = sortScores(Mlx) lD = getDistMat(getCoords(L.R)) rD = getDistMat(getCoords(R.R)) #pdb.set_trace() M = 20 return pauc, lauc, rauc, getDistMean(lD, r, top=True, M=M), getDistMean( lD, r, top=False, M=M), getDistMean(rD, c, top=True, M=M), getDistMean(rD, c, top=False, M=M)
def getAUCs(mx,dx,dd,dthr=6.0): lx=2*(dd<dthr)-1 mxf=mx.flatten() dxf=dx.flatten() lxf=lx.flatten() nanidx=~(np.isnan(mxf)+np.isnan(lxf)+np.isnan(dxf)) lxf=list(lxf[nanidx]) mxf=list(mxf[nanidx]) dxf=list(dxf[nanidx]) (_,_,aa_mi)=roc.roc(mxf,lxf) (_,_,aa_di)=roc.roc(dxf,lxf) return aa_mi,aa_di
def getAUCs(mx, dx, dd, dthr=6.0): lx = 2 * (dd < dthr) - 1 mxf = mx.flatten() dxf = dx.flatten() lxf = lx.flatten() nanidx = ~(np.isnan(mxf) + np.isnan(lxf) + np.isnan(dxf)) lxf = list(lxf[nanidx]) mxf = list(mxf[nanidx]) dxf = list(dxf[nanidx]) (_, _, aa_mi) = roc.roc(mxf, lxf) (_, _, aa_di) = roc.roc(dxf, lxf) return aa_mi, aa_di
def getAUC4Protein(lrV): vl = map(list, zip(*lrV.values())) vv = vl[0] ll = vl[1] (_, _, a) = roc.roc(vv, ll) vv = np.array(vv) ll = np.array(ll) return (a, vv, ll)
def getTP_RFPP(Mv,Ml): """ Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected """ nnan=~(np.isnan(Mv)+np.isnan(Ml)) Mv=Mv[nnan] Ml=Ml[nnan] rfpp=np.argmax(Ml[np.argsort(-Mv)]==1); (fpr,tpr,r)=roc.roc(list(Mv),list(Ml),50,normalize=False); ntp=np.max(tpr); return (ntp,rfpp)
def parseShandarFiles(ifile,auconly=False,**kwargs): #(auc,Mv,Ml,lseq,rseq,lrV,rrV) """ Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile) """ def parseContLine(ln): # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61'] # 0 1 2 3 4 5 6 7 8 9 10 lns=ln.split() lidx=lns[0]+lns[1] ridx=lns[5]+lns[6] lbl=int(lns[9]) return (lidx,ridx,lbl) loopath,cid,_=getFileParts(ifile) lcids=cid.split('_')[1] rcids=cid.split('_')[2] Mlidx={} Mridx={} Mlv=[] l=0 r=0 with open(os.path.join(loopath,cid+'.preds')) as fp,open(os.path.join(loopath,cid+'.cont')) as fc: for lnp,lnc in zip(fp,fc): (lidx,ridx,lbl)=parseContLine(lnc) if lidx[0] in lcids and ridx[0] in rcids: try: lx=Mlidx[lidx] except: Mlidx[lidx]=l lx=l l=l+1 try: rx=Mridx[ridx] except: Mridx[ridx]=r rx=r r=r+1 p=float(lnp) Mlv.append((lx,rx,lbl,p)) Mvm=np.zeros((l,r)) Mvm.fill(np.nan) Mlm=np.zeros((l,r)) for i in range(len(Mlv)): Mlm[Mlv[i][0],Mlv[i][1]]=Mlv[i][2] Mvm[Mlv[i][0],Mlv[i][1]]=Mlv[i][3] (_,_,auc)=roc.roc(list(Mvm.flatten()),list(Mlm.flatten())) if auconly: return auc #construct lrV,rrV lrV=dict(zip(range(Mvm.shape[0]),zip(np.max(Mvm,axis=1),np.max(Mlm,axis=1)))) rrV=dict(zip(range(Mvm.shape[1]),zip(np.max(Mvm,axis=0),np.max(Mlm,axis=0)))) return auc,Mvm,Mlm,None,None,lrV,rrV
def getTP_RFPP(Mv, Ml): """ Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected """ nnan = ~(np.isnan(Mv) + np.isnan(Ml)) Mv = Mv[nnan] Ml = Ml[nnan] rfpp = np.argmax(Ml[np.argsort(-Mv)] == 1) (fpr, tpr, r) = roc.roc(list(Mv), list(Ml), 50, normalize=False) ntp = np.max(tpr) return (ntp, rfpp)
def roc_score(data, targetClass, otherClass, **args): rocN = None if 'rocN' in args: rocN = args['rocN'] s = numpy.zeros(data.numFeatures, numpy.float_) for i in range(data.numFeatures): featureValues = data.getFeature(i) auc = roc.roc(featureValues, data.labels.Y)[2] s[i] = max(auc, 1 - auc) return s
def roc_score(data, targetClass, otherClass, **args) : rocN = None if 'rocN' in args : rocN = args['rocN'] s = numpy.zeros(data.numFeatures, numpy.float_) for i in range(data.numFeatures) : featureValues = data.getFeature(i) auc = roc.roc(featureValues, data.labels.Y)[2] s[i] = max(auc, 1-auc) return s
def plotROC(self, filename=None, fold = None, **args) : rocN = None if 'rocN' in args : rocN = args['rocN'] if self.numFolds == 1 : # if the results are for a single split labels = self.getGivenClass() dvals = self.getDecisionFunction() rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN) elif fold is None : # get an averaged ROC curve labels = self.getGivenClass() dvals = self.getDecisionFunction() folds = [(dvals[i], labels[i]) for i in range(len(labels))] rocFP, rocTP, area = roc_module.roc_VA(folds, rocN) else : # plot an ROC plot for the given fold if fold > self.numFolds : raise ValueError, 'foldNum too large' labels = self.getGivenClass(fold) dvals = self.getDecisionFunction(fold) rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN) roc_module.plotROC(rocFP, rocTP, filename)
def plotROC(self, filename=None, fold = None, **args) : rocN = None if 'rocN' in args : rocN = args['rocN'] if self.numFolds == 1 : # if the results are for a single split labels = self.getGivenClass() dvals = self.getDecisionFunction() rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN) elif fold is None : # get an averaged ROC curve labels = self.getGivenClass() dvals = self.getDecisionFunction() folds = [(dvals[i], labels[i]) for i in range(len(labels))] rocFP, rocTP, area = roc_module.roc_VA(folds, rocN) else : # plot an ROC plot for the given fold if fold > self.numFolds : raise ValueError, 'foldNum too large' labels = self.getGivenClass(fold) dvals = self.getDecisionFunction(fold) rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN) roc_module.plotROC(rocFP, rocTP, filename, **args)
def parse1SVM(ifile, auconly=False, **kwargs): #,E,Asgl exfname = 'EP_6N.lbl.pkl' sglfile = 'result.sgl.pkl' try: E except NameError: E = getExamplesDBD.loader(exfname) try: Asgl except NameError: Asgl = cPickle.load(open(sglfile, "rb")) cid = getFileParts(getFileParts(ifile)[1])[1][:4] (la, ra, lrV, rrV) = Asgl[cid] I = [] J = [] V = [] L = [] Mv = np.zeros((len(lrV), len(rrV))) Ml = np.zeros(Mv.shape) for lidx, xr in enumerate(lrV.keys()): for ridx, xc in enumerate(rrV.keys()): if (xr, xc) in E.Pex[cid][0]: l = +1.0 else: l = -1.0 I.append(xr) J.append(xc) v = lrV[xr][0] + rrV[xc][0] V.append(v) L.append(l) Mv[lidx, ridx] = v Ml[lidx, ridx] = l #pdb.set_trace() # for idx in range(len(I)): # Mv[I[idx],J[idx]]=V[idx] # Ml[I[idx],J[idx]]=L[idx] (_, _, auc) = roc.roc(list(Mv.flatten()), list(Ml.flatten())) if auconly: return auc return (auc, Mv, Ml, None, None, lrV, rrV) #auc,Mvm,Mlm,None,None,lrV,rrV
def parse1SVM(ifile,auconly=False,**kwargs):#,E,Asgl exfname='EP_6N.lbl.pkl' sglfile='result.sgl.pkl' try: E except NameError: E=getExamplesDBD.loader(exfname) try: Asgl except NameError: Asgl=cPickle.load(open(sglfile, "rb" )) cid=getFileParts(getFileParts(ifile)[1])[1][:4] (la,ra,lrV,rrV)=Asgl[cid] I=[] J=[] V=[] L=[] Mv=np.zeros((len(lrV),len(rrV))) Ml=np.zeros(Mv.shape) for lidx,xr in enumerate(lrV.keys()): for ridx,xc in enumerate(rrV.keys()): if (xr,xc) in E.Pex[cid][0]: l=+1.0 else: l=-1.0 I.append(xr) J.append(xc) v=lrV[xr][0]+rrV[xc][0] V.append(v) L.append(l) Mv[lidx,ridx]=v Ml[lidx,ridx]=l #pdb.set_trace() # for idx in range(len(I)): # Mv[I[idx],J[idx]]=V[idx] # Ml[I[idx],J[idx]]=L[idx] (_,_,auc)=roc.roc(list(Mv.flatten()),list(Ml.flatten())) if auconly: return auc return (auc,Mv,Ml,None,None,lrV,rrV) #auc,Mvm,Mlm,None,None,lrV,rrV
def rasaPlot(rAsa,Dxx,Lxx,Np=10): #nb=getSamplePoints(rAsa,Np) nb=np.linspace(np.min(rAsa),np.max(rAsa),Np) R=np.zeros(len(nb)-1) R.fill(np.nan) xx=np.zeros(len(nb)-1) pp=np.zeros(len(nb)-1) nn=np.zeros(len(nb)-1) for idx in range(len(nb)-1): vidx=np.logical_and(rAsa>=nb[idx], rAsa<nb[idx+1]) v=Dxx[vidx] l=Lxx[vidx] xx[idx]=(nb[idx]+nb[idx+1])/2.0 pp[idx]=np.sum(l==+1) nn[idx]=np.sum(l!=+1) if(pp[idx]>10 and nn[idx]>10): try: (_,_,R[idx])=roc.roc(list(v),list(l)) except: continue naidx=~np.isnan(R) print "Correlation Coefficient: ", pearsonr(xx[naidx],R[naidx]) #PLOTTING ONLY CODE stat="$\Delta$rASA" ww=np.diff(nb) plt.figure(0) plt.plot(xx,R,'-o') plt.xlabel(stat) plt.ylabel("AUC") plt.title('AUC vs $\Delta$rASA') plt.grid() plt.savefig('../figures/fig_A2.eps', format='eps', dpi=1200) plt.figure(1) plt.plot(nb[:-1],pp,'r-^',label="Interacting residues") plt.plot(nb[:-1],nn,'k-v',label="Not-Interacting residues") # plt.bar(nb[:-1],pp,color='r',width=ww,label="+1") # plt.bar(nb[:-1],nn,color='k',width=ww,bottom=pp,label="-1") plt.xlabel(stat) plt.ylabel("Counts") plt.legend(loc=0) plt.title('Number of residues vs. $\Delta$rASA') plt.grid() plt.savefig('../figures/fig_A3.eps', format='eps', dpi=1200)
def computeNTP(ifile, top=200, freader=None): """ Given a result file name ifile and its reader function freader=parseShandarFiles, it computes auc: The auc score ttp: Number of true positives in top fpi: index of the first true positive dntp: Distance to the nearest true positive for each top example la: auc of ligand ra: auc of receptor pp: number of positive examples nn: number of negative examples Mvx: flattened matrix of prediction scores Mlx: flattened matrix of labels on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader) """ if type(ifile) == type(''): assert freader is not None (_, Mv, Ml, lseq, rseq, lrV, rrV) = freader(ifile, usePDBidx=False) else: #expects tuple (_, Mv, Ml, lseq, rseq, lrV, rrV) = ifile (la, lv, ll) = getAUC4Protein(lrV) (ra, rv, rl) = getAUC4Protein(rrV) Mvx = Mv.ravel() Mlx = Ml.ravel() nidx = ~np.isnan(Mvx) & ~np.isnan(Mlx) (_, _, auc) = roc.roc(list(Mvx[nidx]), list(Mlx[nidx])) Mvx[~nidx] = -np.inf (ttp, fpi, dntp) = findNTPinTop(Mvx, Mlx, Mv.shape, top=top) Mvx = Mvx[nidx] Mlx = Mlx[nidx] #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx))) #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl))) #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll))) #pdb.set_trace() pp = np.sum(Mlx == 1) # total number of positives nn = len(Mlx) - pp #total number of negatives #pdb.set_trace() return (auc, ttp, fpi, dntp, la, ra, pp, nn, Mvx, Mlx, lv, ll, rv, rl)
def computeNTP(ifile,top=200,freader=None): """ Given a result file name ifile and its reader function freader=parseShandarFiles, it computes auc: The auc score ttp: Number of true positives in top fpi: index of the first true positive dntp: Distance to the nearest true positive for each top example la: auc of ligand ra: auc of receptor pp: number of positive examples nn: number of negative examples Mvx: flattened matrix of prediction scores Mlx: flattened matrix of labels on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader) """ if type(ifile)==type(''): assert freader is not None (_,Mv,Ml,lseq,rseq,lrV,rrV)=freader(ifile,usePDBidx=False) else: #expects tuple (_,Mv,Ml,lseq,rseq,lrV,rrV)=ifile (la,lv,ll)=getAUC4Protein(lrV) (ra,rv,rl)=getAUC4Protein(rrV) Mvx=Mv.ravel() Mlx=Ml.ravel() nidx=~np.isnan(Mvx) & ~np.isnan(Mlx) (_,_,auc)=roc.roc(list(Mvx[nidx]),list(Mlx[nidx])) Mvx[~nidx]=-np.inf (ttp,fpi,dntp)=findNTPinTop(Mvx,Mlx,Mv.shape,top=top) Mvx=Mvx[nidx] Mlx=Mlx[nidx] #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx))) #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl))) #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll))) #pdb.set_trace() pp=np.sum(Mlx==1) # total number of positives nn=len(Mlx)-pp #total number of negatives #pdb.set_trace() return (auc,ttp,fpi,dntp,la,ra,pp,nn,Mvx,Mlx,lv,ll,rv,rl)
def getAUC(s): if type(s)==type(''): (r,dkey)=cPickle.load(open(s, "rb" ) ) else: (r,dkey)=s patid=combineList(r.getPatternID()) vkey=dict(zip(patid,range(len(patid)))) decfn=combineList(r.getDecisionFunction()) lblid=combineList(r.getGivenLabels()) cids=dkey.keys() D=[[] for i in cids] L=[[] for i in cids] A=[[] for i in cids] try: R=getRMSDDict('shandar_rmsd.txt') except: R=None Rx=[[] for i in cids] for i,cid in enumerate(cids): cidx=dkey[cid] if type(cidx) is tuple: #backward compatability to old results objects cidx=cidx[0] for e in cidx: try: n=vkey[e] except KeyError: pdb.set_trace() D[i].append(decfn[n]) L[i].append(lblid[n]) (_,_,a)=roc.roc(D[i],L[i]) A[i]=a if R is not None: Rx[i]=R[cid] (fp,tp,auc)=roc.roc_VA(zip(D,L)) return (auc,(fp,tp),(A,Rx,D,L,cids,r,dkey))
def getAUC(s): if type(s) == type(''): (r, dkey) = cPickle.load(open(s, "rb")) else: (r, dkey) = s patid = combineList(r.getPatternID()) vkey = dict(zip(patid, range(len(patid)))) decfn = combineList(r.getDecisionFunction()) lblid = combineList(r.getGivenLabels()) cids = dkey.keys() D = [[] for i in cids] L = [[] for i in cids] A = [[] for i in cids] try: R = getRMSDDict('shandar_rmsd.txt') except: R = None Rx = [[] for i in cids] for i, cid in enumerate(cids): cidx = dkey[cid] if type(cidx) is tuple: #backward compatability to old results objects cidx = cidx[0] for e in cidx: try: n = vkey[e] except KeyError: pdb.set_trace() D[i].append(decfn[n]) L[i].append(lblid[n]) (_, _, a) = roc.roc(D[i], L[i]) A[i] = a if R is not None: Rx[i] = R[cid] (fp, tp, auc) = roc.roc_VA(zip(D, L)) return (auc, (fp, tp), (A, Rx, D, L, cids, r, dkey))
from myPDB import * from getExamplesDBD_breakup import * from PyML.evaluators.roc import roc E = getExamplesDBD.loader( os.path.join('../../DBD4CSPKL/PKL', 'ENS_15_35_50.lbl.pkl')) pdbdir = '../../DBD4CSPKL/PDB_all_' pkldir = '../../DBD4CSPKL/PKL' F = list( set([ getFileParts(g)[1].split('.')[0] for g in glob.glob(os.path.join(pkldir, '*.pdb.pkl')) ])) A = {} for fid in F: print fid X = myPDB.loader(os.path.join(pkldir, fid + '.pdb.pkl')) C = np.max(X.pssm, axis=0) #C=X.rasa#np.sum(.psfm,axis=0) #C=JSON2ConsScore(ipdbfile, jfile) fcids = [k for k in E.Pex.keys() if (fid in k)] fPi = [] for c in fcids: fPi.extend([i[int(c[0] != fid)] for i in E.Pex[c][0]]) fPi = np.unique(np.array(fPi)) if len(fPi): L = np.zeros(len(C)) L[fPi] = 1.0 A[fid] = roc(list(C), list(L))[-1]
def parseShandarFiles(ifile, auconly=False, **kwargs): #(auc,Mv,Ml,lseq,rseq,lrV,rrV) """ Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile) """ def parseContLine(ln): # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61'] # 0 1 2 3 4 5 6 7 8 9 10 lns = ln.split() lidx = lns[0] + lns[1] ridx = lns[5] + lns[6] lbl = int(lns[9]) return (lidx, ridx, lbl) loopath, cid, _ = getFileParts(ifile) lcids = cid.split('_')[1] rcids = cid.split('_')[2] Mlidx = {} Mridx = {} Mlv = [] l = 0 r = 0 with open(os.path.join(loopath, cid + '.preds')) as fp, open( os.path.join(loopath, cid + '.cont')) as fc: for lnp, lnc in zip(fp, fc): (lidx, ridx, lbl) = parseContLine(lnc) if lidx[0] in lcids and ridx[0] in rcids: try: lx = Mlidx[lidx] except: Mlidx[lidx] = l lx = l l = l + 1 try: rx = Mridx[ridx] except: Mridx[ridx] = r rx = r r = r + 1 p = float(lnp) Mlv.append((lx, rx, lbl, p)) Mvm = np.zeros((l, r)) Mvm.fill(np.nan) Mlm = np.zeros((l, r)) for i in range(len(Mlv)): Mlm[Mlv[i][0], Mlv[i][1]] = Mlv[i][2] Mvm[Mlv[i][0], Mlv[i][1]] = Mlv[i][3] (_, _, auc) = roc.roc(list(Mvm.flatten()), list(Mlm.flatten())) if auconly: return auc #construct lrV,rrV lrV = dict( zip(range(Mvm.shape[0]), zip(np.max(Mvm, axis=1), np.max(Mlm, axis=1)))) rrV = dict( zip(range(Mvm.shape[1]), zip(np.max(Mvm, axis=0), np.max(Mlm, axis=0)))) return auc, Mvm, Mlm, None, None, lrV, rrV
dd = [] ld = [] md = [] for p in E.Pex[cid][0]: if p in mx: #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]]) dd.append(dx[p]) ld.append(+1) md.append(mx[p]) for n in E.getNegEx(cid): if n in mx: #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]]) dd.append(dx[n]) md.append(mx[n]) ld.append(-1) (_, _, aa_di) = roc.roc(dd, ld) (_, _, aa_mi) = roc.roc(md, ld) Xauc.append([aa_mi, aa_di]) print cid, ncs, lstats[0].shape[0], rstats[0].shape[0], Xauc[ -1], Xuauc[-1], Lauc[-2:] #pdb.set_trace() if (myid != 0): comm.send(myEdata, dest=0) else: """ for p in range(1,nprocs): myEdata.extend(comm.recv(source=p)) output = open(ofname, 'wb') cPickle.dump(myEdata, output,-1) output.close() if evalROC:
'2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU' ] fs = f3 + f4 E = getExamplesDBD.loader(efile) A = {} for cid in fs: print cid L = myPDB.loader(bdir + cid + '_l_u.pdb.pkl') R = myPDB.loader(bdir + cid + '_r_u.pdb.pkl') V = [] Y = [] for p in E.Pex[cid][0]: v = L.B[p[0]] + R.B[p[ 1]] #np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]]) if ~np.isnan(v): V.append(v) Y.append(+1) for n in E.getNegEx(cid): v = L.B[n[0]] + R.B[n[ 1]] #np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]]) if ~np.isnan(v): V.append(v) Y.append(-1) (_, _, auc) = roc.roc(V, Y) A[cid] = auc
efile=bdir+'E_125PN_15_35_50.lbl.pkl' #fs=glob.glob(bdir+'*_u.pdb.pkl') f3=['1SBB', '1JPS', '2HMI', '1GHQ', '1KTZ', '1K74', '1D6R', '2SIC', '1GPW', '1XD3', '1EAW', '1VFB', '7CEI', '1E4K', '1I4D', '1H1V', '2PCC', '1FQ1', '2HLE', '1FQJ', '1S1Q', '2OOB', '1UDI', '1KLU', '1WQ1', '1CGI', '1ATN', '1N2C', '1GP2', '1FAK', '1NW9', '1GLA', '1GRN', '2HRK', '1AZS', '1JMO', '1PXV', '1EWY', '1RLB', '1DQJ', '2BTF', '2I25', '1I2M', '1BUH', '1BGX', '1ML0', '1EFN', '1DFJ', '1Y64', '2UUY', '1MAH', '1BVK', '1BVN', '1EER', '1MLC', '1NSN', '1AK4', '1A2K', '1QFW', '2H7V', '1T6B', '1KAC', '1YVB', '1J2J', '1QA9', '1AHW', '2OT3', '2FD6', '2AJF', '1K4C', '1NCA', '1OPH', '1XQS', '1B6C', '1PPE', '2O8V', '1HIA', '1Z0K', '1R0R', '1WEJ', '1ACB', '1KXP', '1KXQ', '1R8S', '1IRA', '1GCQ', '1F51', '2B42', '2HQS', '1AKJ', '2JEL', '1KKL', '1FC2', '1E96', '1N8O', '2MTA', '2VIS', '1IB1', '1E6J', '1Z5Y', '1EZU', '1TMQ', '2C0L', '1E6E', '1IQD', '1ZHI', '1M10', '2NZ8', '1AY7', '1HE8', '1IJK', '1HE1', '1FSK', '1F34', '2SNI', '1BJ1', '2CFH', '1BKD', '1DE4', '1IBR', '1I9R', '1K5D', '1AVX'] f4=['2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU'] fs=f3+f4 E=getExamplesDBD.loader(efile) A={} for cid in fs: print cid L=myPDB.loader(bdir+cid+'_l_u.pdb.pkl') R=myPDB.loader(bdir+cid+'_r_u.pdb.pkl') V=[] Y=[] for p in E.Pex[cid][0]: v=L.B[p[0]]+R.B[p[1]]#np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]]) if ~np.isnan(v): V.append(v) Y.append(+1) for n in E.getNegEx(cid): v=L.B[n[0]]+R.B[n[1]]#np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]]) if ~np.isnan(v): V.append(v) Y.append(-1) (_,_,auc)=roc.roc(V,Y) A[cid]=auc
from PyML.evaluators import roc from postProcess import postProcessAvg #from getExamplesDBD import getPosex from symmetryProcessing import * #bdir='../CAPRI/' bdir='../../g2mers/' cid='1MLC' #(_,_,P,_,_)=getPosex(bdir,cid) #get positive examples P=getPosexFromPDB(bdir,cid,dthr=6.0) # Handles symmetry in the complex ppfile=bdir+cid+'.pairpred.txt' (auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(ppfile,usePDBidx=False) #auc0,Mvc0,Mv,Mlc,lseq,rseq,lrV0,lrV,rrV0,rrV=postProcessAvg(cid,bdir,bdir) # #Mv[:10,:]=np.nan #Mv[-10:,:]=np.nan #Mv[:,:10]=np.nan #Mv[:,-10:]=np.nan Mvtbl=np.zeros(Mv.shape) for (i,j) in P: Mvtbl[i,j]=1.0 Mvr=Mv.ravel() Mvtblr=Mvtbl.ravel() nidx=(~np.isnan(Mvr)) Mvr=Mvr[nidx] Mvtblr=Mvtblr[nidx] (fpv,tpv,aucv)=roc.roc(list(Mvr),list(Mvtblr)) print cid,"AUC =",aucv, "RFPP =",np.argmax(Mvtblr[np.argsort(-Mvr)]==1) (fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=0)),list(np.nanmax(Mvtbl,axis=0))); print auc (fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=1)),list(np.nanmax(Mvtbl,axis=1))); print auc
dd=[] ld=[] md=[] for p in E.Pex[cid][0]: if p in mx: #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]]) dd.append(dx[p]) ld.append(+1) md.append(mx[p]) for n in E.getNegEx(cid): if n in mx: #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]]) dd.append(dx[n]) md.append(mx[n]) ld.append(-1) (_,_,aa_di)=roc.roc(dd,ld) (_,_,aa_mi)=roc.roc(md,ld) Xauc.append([aa_mi,aa_di]) print cid,ncs,lstats[0].shape[0],rstats[0].shape[0],Xauc[-1],Xuauc[-1],Lauc[-2:] #pdb.set_trace() if(myid!=0): comm.send(myEdata, dest=0) else: """ for p in range(1,nprocs): myEdata.extend(comm.recv(source=p)) output = open(ofname, 'wb') cPickle.dump(myEdata, output,-1) output.close() if evalROC: MV=[]
def getROC(self, rocN = None) : rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY, rocN, self.rocNormalization) return rocValue
""" Created on Wed Nov 27 08:40:33 2013 @author: root """ from myPDB import * from getExamplesDBD_breakup import * from PyML.evaluators.roc import roc E=getExamplesDBD.loader(os.path.join('../../DBD4CSPKL/PKL','ENS_15_35_50.lbl.pkl')) pdbdir='../../DBD4CSPKL/PDB_all_' pkldir='../../DBD4CSPKL/PKL' F=list(set([getFileParts(g)[1].split('.')[0] for g in glob.glob(os.path.join(pkldir,'*.pdb.pkl'))])) A={} for fid in F: print fid X=myPDB.loader(os.path.join(pkldir,fid+'.pdb.pkl')) C=np.max(X.pssm,axis=0) #C=X.rasa#np.sum(.psfm,axis=0) #C=JSON2ConsScore(ipdbfile, jfile) fcids=[k for k in E.Pex.keys() if (fid in k)] fPi=[] for c in fcids: fPi.extend([i[int(c[0]!=fid)] for i in E.Pex[c][0]]) fPi=np.unique(np.array(fPi)) if len(fPi): L=np.zeros(len(C)) L[fPi]=1.0 A[fid]=roc(list(C),list(L))[-1]
def getAUC4Protein(lrV): vl=map(list, zip(*lrV.values()));vv=vl[0];ll=vl[1] (_,_,a)=roc.roc(vv,ll) vv=np.array(vv) ll=np.array(ll) return (a,vv,ll)
except: continue #Get our results Mo=np.zeros((len(L.S2Ri),len(R.S2Ri))) Mo.fill(np.nan) ifile=cdir+'/InterPRed_prediction/2X000.InterPRed.txt' for ln in open(ifile,'r'): lns=ln.split() r=int(lns[3]) c=int(lns[8]) v=float(lns[10]) Mo[r,c]=v """ print 'MI1- auc',roc.roc(MI1,trbs)[-1] print 'Shandar - auc',roc.roc(np.nanmax(M,axis=0),trbs)[-1] print 'Our - auc',roc.roc(np.nanmax(Mo,axis=0),trbs)[-1] plotdv(rseqn,trbs) plotdv(rseqn,np.nanmax(M,axis=0)) plotdv(rseqn,np.nanmax(Mo,axis=0)) #plotdv(rseqn,MI1) plt.show() """ plt.plot([0,1],[0, 1],'k:',linewidth=2.0) (fp,tp,auc)=roc.roc(list(M.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'r-.',linewidth=2.0);print auc (fp,tp,auc)=roc.roc(MI1,list(np.nanmax(Mt,axis=0)));plt.plot(fp,tp,'g--',linewidth=2.0);print auc (fp,tp,auc)=roc.roc(list(Mo.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'b-',linewidth=2.0);print auc plt.grid() plt.xlabel('FPR') plt.ylabel('TPR') plt.legend(['Random : 50.0','PPiPP : 54.0','MI-1 : 59.6','PAIRPred : 63.8'],loc=0);plt.title('EF-CAM Results');plt.show()