Exemple #1
0
def computeDistMeansForComplex(cid,N,pdbpklpath,pppath):
    """
    code for getting distance and auc information
    """
    L=myPDB.loader(os.path.join(pdbpklpath,cid+'_l_u.pdb.pkl'))
    R=myPDB.loader(os.path.join(pdbpklpath,cid+'_r_u.pdb.pkl'))
    if type(pppath)==type(''):
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=readFile(pppath+cid+'.pairpred.txt',usePDBidx=False)
    else:
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=pppath
    lauc=None
    rauc=None
    try:
        (_,_,lauc)=roc.roc(list(np.array(lauc.values())[:,0]),list(np.array(lauc.values())[:,1]))
        (_,_,rauc)=roc.roc(list(np.array(rauc.values())[:,0]),list(np.array(rauc.values())[:,1]))
    except:
        pass
    Mlx=np.random.random(Ml.shape)
    Mlx[Ml<0]=-1
    (r,c,v)=sortScores(Mlx)
    
    lD=getDistMat(getCoords(L.R))
    rD=getDistMat(getCoords(R.R))    
    #pdb.set_trace()
    M=20
    return pauc,lauc,rauc,getDistMean(lD,r,top=True,M=M), getDistMean(lD,r,top=False,M=M), getDistMean(rD,c,top=True,M=M), getDistMean(rD,c,top=False,M=M)
Exemple #2
0
def computeDistMeansForComplex(cid, N, pdbpklpath, pppath):
    """
    code for getting distance and auc information
    """
    L = myPDB.loader(os.path.join(pdbpklpath, cid + '_l_u.pdb.pkl'))
    R = myPDB.loader(os.path.join(pdbpklpath, cid + '_r_u.pdb.pkl'))
    if type(pppath) == type(''):
        (pauc, Mv, Ml, lseq, rseq, lauc,
         rauc) = readFile(pppath + cid + '.pairpred.txt', usePDBidx=False)
    else:
        (pauc, Mv, Ml, lseq, rseq, lauc, rauc) = pppath
    lauc = None
    rauc = None
    try:
        (_, _, lauc) = roc.roc(list(np.array(lauc.values())[:, 0]),
                               list(np.array(lauc.values())[:, 1]))
        (_, _, rauc) = roc.roc(list(np.array(rauc.values())[:, 0]),
                               list(np.array(rauc.values())[:, 1]))
    except:
        pass
    Mlx = np.random.random(Ml.shape)
    Mlx[Ml < 0] = -1
    (r, c, v) = sortScores(Mlx)

    lD = getDistMat(getCoords(L.R))
    rD = getDistMat(getCoords(R.R))
    #pdb.set_trace()
    M = 20
    return pauc, lauc, rauc, getDistMean(lD, r, top=True, M=M), getDistMean(
        lD, r, top=False, M=M), getDistMean(rD, c, top=True,
                                            M=M), getDistMean(rD,
                                                              c,
                                                              top=False,
                                                              M=M)
Exemple #3
0
def getAUCs(mx,dx,dd,dthr=6.0):
    lx=2*(dd<dthr)-1
    mxf=mx.flatten()
    dxf=dx.flatten()
    lxf=lx.flatten()
    nanidx=~(np.isnan(mxf)+np.isnan(lxf)+np.isnan(dxf))
    lxf=list(lxf[nanidx])
    mxf=list(mxf[nanidx])
    dxf=list(dxf[nanidx])
    (_,_,aa_mi)=roc.roc(mxf,lxf)
    (_,_,aa_di)=roc.roc(dxf,lxf)
    return aa_mi,aa_di
Exemple #4
0
def getAUCs(mx, dx, dd, dthr=6.0):
    lx = 2 * (dd < dthr) - 1
    mxf = mx.flatten()
    dxf = dx.flatten()
    lxf = lx.flatten()
    nanidx = ~(np.isnan(mxf) + np.isnan(lxf) + np.isnan(dxf))
    lxf = list(lxf[nanidx])
    mxf = list(mxf[nanidx])
    dxf = list(dxf[nanidx])
    (_, _, aa_mi) = roc.roc(mxf, lxf)
    (_, _, aa_di) = roc.roc(dxf, lxf)
    return aa_mi, aa_di
Exemple #5
0
def getAUC4Protein(lrV):
    vl = map(list, zip(*lrV.values()))
    vv = vl[0]
    ll = vl[1]
    (_, _, a) = roc.roc(vv, ll)
    vv = np.array(vv)
    ll = np.array(ll)
    return (a, vv, ll)
def getTP_RFPP(Mv,Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan=~(np.isnan(Mv)+np.isnan(Ml))
    Mv=Mv[nnan]
    Ml=Ml[nnan]
    rfpp=np.argmax(Ml[np.argsort(-Mv)]==1); 
    (fpr,tpr,r)=roc.roc(list(Mv),list(Ml),50,normalize=False);
    ntp=np.max(tpr); 
    return (ntp,rfpp)    
def parseShandarFiles(ifile,auconly=False,**kwargs): #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns=ln.split()
        lidx=lns[0]+lns[1]
        ridx=lns[5]+lns[6]
        lbl=int(lns[9])
        return (lidx,ridx,lbl)
        
    loopath,cid,_=getFileParts(ifile)
    lcids=cid.split('_')[1]
    rcids=cid.split('_')[2]
    Mlidx={}
    Mridx={}
    Mlv=[]    
    l=0
    r=0
    with open(os.path.join(loopath,cid+'.preds')) as fp,open(os.path.join(loopath,cid+'.cont')) as fc:
        for lnp,lnc in zip(fp,fc):    
            (lidx,ridx,lbl)=parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx=Mlidx[lidx]
                except:
                    Mlidx[lidx]=l
                    lx=l
                    l=l+1
                try:
                    rx=Mridx[ridx]
                except:
                    Mridx[ridx]=r
                    rx=r
                    r=r+1
                p=float(lnp)
                Mlv.append((lx,rx,lbl,p))                
    Mvm=np.zeros((l,r))
    Mvm.fill(np.nan)
    Mlm=np.zeros((l,r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0],Mlv[i][1]]=Mlv[i][2]
        Mvm[Mlv[i][0],Mlv[i][1]]=Mlv[i][3]    
    
    (_,_,auc)=roc.roc(list(Mvm.flatten()),list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV=dict(zip(range(Mvm.shape[0]),zip(np.max(Mvm,axis=1),np.max(Mlm,axis=1))))
    rrV=dict(zip(range(Mvm.shape[1]),zip(np.max(Mvm,axis=0),np.max(Mlm,axis=0))))
    
    return auc,Mvm,Mlm,None,None,lrV,rrV
def getTP_RFPP(Mv, Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan = ~(np.isnan(Mv) + np.isnan(Ml))
    Mv = Mv[nnan]
    Ml = Ml[nnan]
    rfpp = np.argmax(Ml[np.argsort(-Mv)] == 1)
    (fpr, tpr, r) = roc.roc(list(Mv), list(Ml), 50, normalize=False)
    ntp = np.max(tpr)
    return (ntp, rfpp)
Exemple #9
0
def roc_score(data, targetClass, otherClass, **args):

    rocN = None
    if 'rocN' in args:
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures):
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1 - auc)

    return s
Exemple #10
0
def roc_score(data, targetClass, otherClass, **args) :

    rocN = None
    if 'rocN' in args :
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures) :
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1-auc)

    return s
    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename)
    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename, **args)
def parse1SVM(ifile, auconly=False, **kwargs):  #,E,Asgl

    exfname = 'EP_6N.lbl.pkl'
    sglfile = 'result.sgl.pkl'
    try:
        E
    except NameError:

        E = getExamplesDBD.loader(exfname)
    try:
        Asgl
    except NameError:
        Asgl = cPickle.load(open(sglfile, "rb"))

    cid = getFileParts(getFileParts(ifile)[1])[1][:4]
    (la, ra, lrV, rrV) = Asgl[cid]

    I = []
    J = []
    V = []
    L = []
    Mv = np.zeros((len(lrV), len(rrV)))
    Ml = np.zeros(Mv.shape)
    for lidx, xr in enumerate(lrV.keys()):
        for ridx, xc in enumerate(rrV.keys()):
            if (xr, xc) in E.Pex[cid][0]:
                l = +1.0
            else:
                l = -1.0
            I.append(xr)
            J.append(xc)
            v = lrV[xr][0] + rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx, ridx] = v
            Ml[lidx, ridx] = l

    #pdb.set_trace()


#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_, _, auc) = roc.roc(list(Mv.flatten()), list(Ml.flatten()))
    if auconly:
        return auc

    return (auc, Mv, Ml, None, None, lrV, rrV)  #auc,Mvm,Mlm,None,None,lrV,rrV
def parse1SVM(ifile,auconly=False,**kwargs):#,E,Asgl
    
    exfname='EP_6N.lbl.pkl'
    sglfile='result.sgl.pkl' 
    try:
        E
    except NameError:
        
        E=getExamplesDBD.loader(exfname) 
    try:
        Asgl
    except NameError:
        Asgl=cPickle.load(open(sglfile, "rb" ))
    
    cid=getFileParts(getFileParts(ifile)[1])[1][:4]
    (la,ra,lrV,rrV)=Asgl[cid]
    
    I=[]
    J=[]
    V=[]
    L=[]
    Mv=np.zeros((len(lrV),len(rrV)))
    Ml=np.zeros(Mv.shape) 
    for lidx,xr in enumerate(lrV.keys()):
        for ridx,xc in enumerate(rrV.keys()):
            if (xr,xc) in E.Pex[cid][0]:
                l=+1.0
            else:
                l=-1.0
            I.append(xr)
            J.append(xc)
            v=lrV[xr][0]+rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx,ridx]=v
            Ml[lidx,ridx]=l
    
    #pdb.set_trace()
#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_,_,auc)=roc.roc(list(Mv.flatten()),list(Ml.flatten()))
    if auconly:
        return auc
    
    return (auc,Mv,Ml,None,None,lrV,rrV) #auc,Mvm,Mlm,None,None,lrV,rrV
Exemple #15
0
def rasaPlot(rAsa,Dxx,Lxx,Np=10):
    #nb=getSamplePoints(rAsa,Np)
    nb=np.linspace(np.min(rAsa),np.max(rAsa),Np)
    R=np.zeros(len(nb)-1)
    R.fill(np.nan)
    xx=np.zeros(len(nb)-1)
    pp=np.zeros(len(nb)-1)
    nn=np.zeros(len(nb)-1)
    for idx in range(len(nb)-1):
        vidx=np.logical_and(rAsa>=nb[idx], rAsa<nb[idx+1])
        v=Dxx[vidx]
        l=Lxx[vidx]
        xx[idx]=(nb[idx]+nb[idx+1])/2.0
        pp[idx]=np.sum(l==+1)
        nn[idx]=np.sum(l!=+1)
        if(pp[idx]>10 and nn[idx]>10):
            try:
                (_,_,R[idx])=roc.roc(list(v),list(l))
            except:
                continue   
    naidx=~np.isnan(R)
    print "Correlation Coefficient: ", pearsonr(xx[naidx],R[naidx])
    #PLOTTING ONLY CODE
    stat="$\Delta$rASA"
    ww=np.diff(nb)
    plt.figure(0)
    plt.plot(xx,R,'-o')
    plt.xlabel(stat)
    plt.ylabel("AUC")
    plt.title('AUC vs $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A2.eps', format='eps', dpi=1200)

    plt.figure(1)
    plt.plot(nb[:-1],pp,'r-^',label="Interacting residues")
    plt.plot(nb[:-1],nn,'k-v',label="Not-Interacting residues")
#    plt.bar(nb[:-1],pp,color='r',width=ww,label="+1")
#    plt.bar(nb[:-1],nn,color='k',width=ww,bottom=pp,label="-1")
    plt.xlabel(stat)
    plt.ylabel("Counts")
    plt.legend(loc=0)
    plt.title('Number of residues vs. $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A3.eps', format='eps', dpi=1200)
def computeNTP(ifile, top=200, freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile) == type(''):
        assert freader is not None
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = freader(ifile, usePDBidx=False)
    else:  #expects tuple
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = ifile

    (la, lv, ll) = getAUC4Protein(lrV)
    (ra, rv, rl) = getAUC4Protein(rrV)
    Mvx = Mv.ravel()
    Mlx = Ml.ravel()
    nidx = ~np.isnan(Mvx) & ~np.isnan(Mlx)
    (_, _, auc) = roc.roc(list(Mvx[nidx]), list(Mlx[nidx]))
    Mvx[~nidx] = -np.inf
    (ttp, fpi, dntp) = findNTPinTop(Mvx, Mlx, Mv.shape, top=top)
    Mvx = Mvx[nidx]
    Mlx = Mlx[nidx]

    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp = np.sum(Mlx == 1)  # total number of positives
    nn = len(Mlx) - pp  #total number of negatives
    #pdb.set_trace()
    return (auc, ttp, fpi, dntp, la, ra, pp, nn, Mvx, Mlx, lv, ll, rv, rl)
def computeNTP(ifile,top=200,freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile)==type(''):
        assert freader is not None
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=freader(ifile,usePDBidx=False)
    else: #expects tuple
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=ifile
            
    (la,lv,ll)=getAUC4Protein(lrV)
    (ra,rv,rl)=getAUC4Protein(rrV)
    Mvx=Mv.ravel()
    Mlx=Ml.ravel()
    nidx=~np.isnan(Mvx) &  ~np.isnan(Mlx)
    (_,_,auc)=roc.roc(list(Mvx[nidx]),list(Mlx[nidx]))
    Mvx[~nidx]=-np.inf            
    (ttp,fpi,dntp)=findNTPinTop(Mvx,Mlx,Mv.shape,top=top)
    Mvx=Mvx[nidx]
    Mlx=Mlx[nidx]
    
    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp=np.sum(Mlx==1) # total number of positives
    nn=len(Mlx)-pp #total number of negatives
    #pdb.set_trace()
    return (auc,ttp,fpi,dntp,la,ra,pp,nn,Mvx,Mlx,lv,ll,rv,rl)
Exemple #18
0
def getAUC(s):
    if type(s)==type(''):
        (r,dkey)=cPickle.load(open(s, "rb" ) )
    else:
        (r,dkey)=s
 
    patid=combineList(r.getPatternID())
    vkey=dict(zip(patid,range(len(patid))))
    decfn=combineList(r.getDecisionFunction())
    lblid=combineList(r.getGivenLabels())
    cids=dkey.keys()
    D=[[] for i in cids]
    L=[[] for i in cids]
    A=[[] for i in cids]
    try:
        R=getRMSDDict('shandar_rmsd.txt')
    except:
        R=None
    Rx=[[] for i in cids]
    for i,cid in enumerate(cids):
        cidx=dkey[cid]        
        if type(cidx) is tuple: #backward compatability to old results objects 
            cidx=cidx[0]
        for e in cidx:
            try:
                n=vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_,_,a)=roc.roc(D[i],L[i])
        A[i]=a
        if R is not None:
            Rx[i]=R[cid]        
    (fp,tp,auc)=roc.roc_VA(zip(D,L))
    return (auc,(fp,tp),(A,Rx,D,L,cids,r,dkey))
Exemple #19
0
def getAUC(s):
    if type(s) == type(''):
        (r, dkey) = cPickle.load(open(s, "rb"))
    else:
        (r, dkey) = s

    patid = combineList(r.getPatternID())
    vkey = dict(zip(patid, range(len(patid))))
    decfn = combineList(r.getDecisionFunction())
    lblid = combineList(r.getGivenLabels())
    cids = dkey.keys()
    D = [[] for i in cids]
    L = [[] for i in cids]
    A = [[] for i in cids]
    try:
        R = getRMSDDict('shandar_rmsd.txt')
    except:
        R = None
    Rx = [[] for i in cids]
    for i, cid in enumerate(cids):
        cidx = dkey[cid]
        if type(cidx) is tuple:  #backward compatability to old results objects
            cidx = cidx[0]
        for e in cidx:
            try:
                n = vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_, _, a) = roc.roc(D[i], L[i])
        A[i] = a
        if R is not None:
            Rx[i] = R[cid]
    (fp, tp, auc) = roc.roc_VA(zip(D, L))
    return (auc, (fp, tp), (A, Rx, D, L, cids, r, dkey))
Exemple #20
0
from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E = getExamplesDBD.loader(
    os.path.join('../../DBD4CSPKL/PKL', 'ENS_15_35_50.lbl.pkl'))
pdbdir = '../../DBD4CSPKL/PDB_all_'
pkldir = '../../DBD4CSPKL/PKL'
F = list(
    set([
        getFileParts(g)[1].split('.')[0]
        for g in glob.glob(os.path.join(pkldir, '*.pdb.pkl'))
    ]))
A = {}
for fid in F:
    print fid
    X = myPDB.loader(os.path.join(pkldir, fid + '.pdb.pkl'))
    C = np.max(X.pssm, axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)
    fcids = [k for k in E.Pex.keys() if (fid in k)]
    fPi = []
    for c in fcids:
        fPi.extend([i[int(c[0] != fid)] for i in E.Pex[c][0]])
    fPi = np.unique(np.array(fPi))
    if len(fPi):

        L = np.zeros(len(C))
        L[fPi] = 1.0
        A[fid] = roc(list(C), list(L))[-1]
def parseShandarFiles(ifile,
                      auconly=False,
                      **kwargs):  #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns = ln.split()
        lidx = lns[0] + lns[1]
        ridx = lns[5] + lns[6]
        lbl = int(lns[9])
        return (lidx, ridx, lbl)

    loopath, cid, _ = getFileParts(ifile)
    lcids = cid.split('_')[1]
    rcids = cid.split('_')[2]
    Mlidx = {}
    Mridx = {}
    Mlv = []
    l = 0
    r = 0
    with open(os.path.join(loopath, cid + '.preds')) as fp, open(
            os.path.join(loopath, cid + '.cont')) as fc:
        for lnp, lnc in zip(fp, fc):
            (lidx, ridx, lbl) = parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx = Mlidx[lidx]
                except:
                    Mlidx[lidx] = l
                    lx = l
                    l = l + 1
                try:
                    rx = Mridx[ridx]
                except:
                    Mridx[ridx] = r
                    rx = r
                    r = r + 1
                p = float(lnp)
                Mlv.append((lx, rx, lbl, p))
    Mvm = np.zeros((l, r))
    Mvm.fill(np.nan)
    Mlm = np.zeros((l, r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0], Mlv[i][1]] = Mlv[i][2]
        Mvm[Mlv[i][0], Mlv[i][1]] = Mlv[i][3]

    (_, _, auc) = roc.roc(list(Mvm.flatten()), list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV = dict(
        zip(range(Mvm.shape[0]), zip(np.max(Mvm, axis=1), np.max(Mlm,
                                                                 axis=1))))
    rrV = dict(
        zip(range(Mvm.shape[1]), zip(np.max(Mvm, axis=0), np.max(Mlm,
                                                                 axis=0))))

    return auc, Mvm, Mlm, None, None, lrV, rrV
Exemple #22
0
     dd = []
     ld = []
     md = []
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_, _, aa_di) = roc.roc(dd, ld)
     (_, _, aa_mi) = roc.roc(md, ld)
     Xauc.append([aa_mi, aa_di])
     print cid, ncs, lstats[0].shape[0], rstats[0].shape[0], Xauc[
         -1], Xuauc[-1], Lauc[-2:]
     #pdb.set_trace()
 if (myid != 0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:
    '2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX',
    '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B',
    '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T',
    '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M',
    'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD',
    '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU'
]
fs = f3 + f4
E = getExamplesDBD.loader(efile)
A = {}
for cid in fs:
    print cid
    L = myPDB.loader(bdir + cid + '_l_u.pdb.pkl')
    R = myPDB.loader(bdir + cid + '_r_u.pdb.pkl')
    V = []
    Y = []
    for p in E.Pex[cid][0]:
        v = L.B[p[0]] + R.B[p[
            1]]  #np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v = L.B[n[0]] + R.B[n[
            1]]  #np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)
    (_, _, auc) = roc.roc(V, Y)
    A[cid] = auc
efile=bdir+'E_125PN_15_35_50.lbl.pkl'
#fs=glob.glob(bdir+'*_u.pdb.pkl')
f3=['1SBB', '1JPS', '2HMI', '1GHQ', '1KTZ', '1K74', '1D6R', '2SIC', '1GPW', '1XD3', '1EAW', '1VFB', '7CEI', '1E4K', '1I4D', '1H1V', '2PCC', '1FQ1', '2HLE', '1FQJ', '1S1Q', '2OOB', '1UDI', '1KLU', '1WQ1', '1CGI', '1ATN', '1N2C', '1GP2', '1FAK', '1NW9', '1GLA', '1GRN', '2HRK', '1AZS', '1JMO', '1PXV', '1EWY', '1RLB', '1DQJ', '2BTF', '2I25', '1I2M', '1BUH', '1BGX', '1ML0', '1EFN', '1DFJ', '1Y64', '2UUY', '1MAH', '1BVK', '1BVN', '1EER', '1MLC', '1NSN', '1AK4', '1A2K', '1QFW', '2H7V', '1T6B', '1KAC', '1YVB', '1J2J', '1QA9', '1AHW', '2OT3', '2FD6', '2AJF', '1K4C', '1NCA', '1OPH', '1XQS', '1B6C', '1PPE', '2O8V', '1HIA', '1Z0K', '1R0R', '1WEJ', '1ACB', '1KXP', '1KXQ', '1R8S', '1IRA', '1GCQ', '1F51', '2B42', '2HQS', '1AKJ', '2JEL', '1KKL', '1FC2', '1E96', '1N8O', '2MTA', '2VIS', '1IB1', '1E6J', '1Z5Y', '1EZU', '1TMQ', '2C0L', '1E6E', '1IQD', '1ZHI', '1M10', '2NZ8', '1AY7', '1HE8', '1IJK', '1HE1', '1FSK', '1F34', '2SNI', '1BJ1', '2CFH', '1BKD', '1DE4', '1IBR', '1I9R', '1K5D', '1AVX']
f4=['2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU']
fs=f3+f4
E=getExamplesDBD.loader(efile)
A={}
for cid in fs:
    print cid
    L=myPDB.loader(bdir+cid+'_l_u.pdb.pkl')
    R=myPDB.loader(bdir+cid+'_r_u.pdb.pkl')   
    V=[]
    Y=[]
    for p in E.Pex[cid][0]:
        v=L.B[p[0]]+R.B[p[1]]#np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v=L.B[n[0]]+R.B[n[1]]#np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)            
    (_,_,auc)=roc.roc(V,Y)
    A[cid]=auc
    

    
    
    
    
Exemple #25
0
from PyML.evaluators import roc
from postProcess import postProcessAvg
#from getExamplesDBD import getPosex
from symmetryProcessing import *
#bdir='../CAPRI/'
bdir='../../g2mers/'
cid='1MLC'
#(_,_,P,_,_)=getPosex(bdir,cid)    #get positive examples    
P=getPosexFromPDB(bdir,cid,dthr=6.0)        # Handles symmetry in the complex
ppfile=bdir+cid+'.pairpred.txt'

(auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(ppfile,usePDBidx=False)
#auc0,Mvc0,Mv,Mlc,lseq,rseq,lrV0,lrV,rrV0,rrV=postProcessAvg(cid,bdir,bdir)
#
#Mv[:10,:]=np.nan
#Mv[-10:,:]=np.nan
#Mv[:,:10]=np.nan
#Mv[:,-10:]=np.nan

Mvtbl=np.zeros(Mv.shape)
for (i,j) in P:
    Mvtbl[i,j]=1.0
Mvr=Mv.ravel()
Mvtblr=Mvtbl.ravel()
nidx=(~np.isnan(Mvr))
Mvr=Mvr[nidx]
Mvtblr=Mvtblr[nidx]
(fpv,tpv,aucv)=roc.roc(list(Mvr),list(Mvtblr))
print cid,"AUC =",aucv, "RFPP =",np.argmax(Mvtblr[np.argsort(-Mvr)]==1)
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=0)),list(np.nanmax(Mvtbl,axis=0))); print auc
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=1)),list(np.nanmax(Mvtbl,axis=1))); print auc
Exemple #26
0
     dd=[]
     ld=[]
     md=[]
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_,_,aa_di)=roc.roc(dd,ld)
     (_,_,aa_mi)=roc.roc(md,ld)
     Xauc.append([aa_mi,aa_di])
     print cid,ncs,lstats[0].shape[0],rstats[0].shape[0],Xauc[-1],Xuauc[-1],Lauc[-2:]
     #pdb.set_trace()
 if(myid!=0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:
         MV=[]
    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue
    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue
Exemple #29
0
"""
Created on Wed Nov 27 08:40:33 2013

@author: root
"""
from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E=getExamplesDBD.loader(os.path.join('../../DBD4CSPKL/PKL','ENS_15_35_50.lbl.pkl'))
pdbdir='../../DBD4CSPKL/PDB_all_'
pkldir='../../DBD4CSPKL/PKL'
F=list(set([getFileParts(g)[1].split('.')[0] for g in glob.glob(os.path.join(pkldir,'*.pdb.pkl'))]))
A={}
for fid in F:    
    print fid
    X=myPDB.loader(os.path.join(pkldir,fid+'.pdb.pkl'))
    C=np.max(X.pssm,axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)        
    fcids=[k for k in E.Pex.keys() if (fid in k)]
    fPi=[]
    for c in fcids:        
        fPi.extend([i[int(c[0]!=fid)] for i in E.Pex[c][0]])
    fPi=np.unique(np.array(fPi))
    if len(fPi):
        
        L=np.zeros(len(C))
        L[fPi]=1.0
        A[fid]=roc(list(C),list(L))[-1]
def getAUC4Protein(lrV):
    vl=map(list, zip(*lrV.values()));vv=vl[0];ll=vl[1]    
    (_,_,a)=roc.roc(vv,ll)
    vv=np.array(vv)
    ll=np.array(ll)
    return (a,vv,ll)
Exemple #31
0
        except:
            continue
    #Get our results
    Mo=np.zeros((len(L.S2Ri),len(R.S2Ri)))
    Mo.fill(np.nan)
    ifile=cdir+'/InterPRed_prediction/2X000.InterPRed.txt'

    for ln in open(ifile,'r'):
        lns=ln.split()
        r=int(lns[3])
        c=int(lns[8])
        v=float(lns[10])
        Mo[r,c]=v    
    """
    print 'MI1- auc',roc.roc(MI1,trbs)[-1]
    print 'Shandar - auc',roc.roc(np.nanmax(M,axis=0),trbs)[-1]
    print 'Our - auc',roc.roc(np.nanmax(Mo,axis=0),trbs)[-1]
    plotdv(rseqn,trbs)
    plotdv(rseqn,np.nanmax(M,axis=0))
    plotdv(rseqn,np.nanmax(Mo,axis=0))
    #plotdv(rseqn,MI1)
    plt.show()
    """
    plt.plot([0,1],[0, 1],'k:',linewidth=2.0)
    (fp,tp,auc)=roc.roc(list(M.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'r-.',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(MI1,list(np.nanmax(Mt,axis=0)));plt.plot(fp,tp,'g--',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(list(Mo.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'b-',linewidth=2.0);print auc
    plt.grid()
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.legend(['Random : 50.0','PPiPP : 54.0','MI-1 : 59.6','PAIRPred : 63.8'],loc=0);plt.title('EF-CAM Results');plt.show()