Python roc Exemples, PyML.evaluators.roc.roc Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : analyzeDists.py Projet : foxtrotmike/pairpred

def computeDistMeansForComplex(cid,N,pdbpklpath,pppath):
    """
    code for getting distance and auc information
    """
    L=myPDB.loader(os.path.join(pdbpklpath,cid+'_l_u.pdb.pkl'))
    R=myPDB.loader(os.path.join(pdbpklpath,cid+'_r_u.pdb.pkl'))
    if type(pppath)==type(''):
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=readFile(pppath+cid+'.pairpred.txt',usePDBidx=False)
    else:
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=pppath
    lauc=None
    rauc=None
    try:
        (_,_,lauc)=roc.roc(list(np.array(lauc.values())[:,0]),list(np.array(lauc.values())[:,1]))
        (_,_,rauc)=roc.roc(list(np.array(rauc.values())[:,0]),list(np.array(rauc.values())[:,1]))
    except:
        pass
    Mlx=np.random.random(Ml.shape)
    Mlx[Ml<0]=-1
    (r,c,v)=sortScores(Mlx)
    
    lD=getDistMat(getCoords(L.R))
    rD=getDistMat(getCoords(R.R))    
    #pdb.set_trace()
    M=20
    return pauc,lauc,rauc,getDistMean(lD,r,top=True,M=M), getDistMean(lD,r,top=False,M=M), getDistMean(rD,c,top=True,M=M), getDistMean(rD,c,top=False,M=M)

Exemple #2

0

Afficher le fichier

def computeDistMeansForComplex(cid, N, pdbpklpath, pppath):
    """
    code for getting distance and auc information
    """
    L = myPDB.loader(os.path.join(pdbpklpath, cid + '_l_u.pdb.pkl'))
    R = myPDB.loader(os.path.join(pdbpklpath, cid + '_r_u.pdb.pkl'))
    if type(pppath) == type(''):
        (pauc, Mv, Ml, lseq, rseq, lauc,
         rauc) = readFile(pppath + cid + '.pairpred.txt', usePDBidx=False)
    else:
        (pauc, Mv, Ml, lseq, rseq, lauc, rauc) = pppath
    lauc = None
    rauc = None
    try:
        (_, _, lauc) = roc.roc(list(np.array(lauc.values())[:, 0]),
                               list(np.array(lauc.values())[:, 1]))
        (_, _, rauc) = roc.roc(list(np.array(rauc.values())[:, 0]),
                               list(np.array(rauc.values())[:, 1]))
    except:
        pass
    Mlx = np.random.random(Ml.shape)
    Mlx[Ml < 0] = -1
    (r, c, v) = sortScores(Mlx)

    lD = getDistMat(getCoords(L.R))
    rD = getDistMat(getCoords(R.R))
    #pdb.set_trace()
    M = 20
    return pauc, lauc, rauc, getDistMean(lD, r, top=True, M=M), getDistMean(
        lD, r, top=False, M=M), getDistMean(rD, c, top=True,
                                            M=M), getDistMean(rD,
                                                              c,
                                                              top=False,
                                                              M=M)

Exemple #3

0

Afficher le fichier

Fichier : calcMIDI.py Projet : foxtrotmike/pairpred

def getAUCs(mx,dx,dd,dthr=6.0):
    lx=2*(dd<dthr)-1
    mxf=mx.flatten()
    dxf=dx.flatten()
    lxf=lx.flatten()
    nanidx=~(np.isnan(mxf)+np.isnan(lxf)+np.isnan(dxf))
    lxf=list(lxf[nanidx])
    mxf=list(mxf[nanidx])
    dxf=list(dxf[nanidx])
    (_,_,aa_mi)=roc.roc(mxf,lxf)
    (_,_,aa_di)=roc.roc(dxf,lxf)
    return aa_mi,aa_di

Exemple #4

0

Afficher le fichier

Fichier : calcMIDI.py Projet : foxtrotmike/pairpred

def getAUCs(mx, dx, dd, dthr=6.0):
    lx = 2 * (dd < dthr) - 1
    mxf = mx.flatten()
    dxf = dx.flatten()
    lxf = lx.flatten()
    nanidx = ~(np.isnan(mxf) + np.isnan(lxf) + np.isnan(dxf))
    lxf = list(lxf[nanidx])
    mxf = list(mxf[nanidx])
    dxf = list(dxf[nanidx])
    (_, _, aa_mi) = roc.roc(mxf, lxf)
    (_, _, aa_di) = roc.roc(dxf, lxf)
    return aa_mi, aa_di

Exemple #5

0

Afficher le fichier

def getAUC4Protein(lrV):
    vl = map(list, zip(*lrV.values()))
    vv = vl[0]
    ll = vl[1]
    (_, _, a) = roc.roc(vv, ll)
    vv = np.array(vv)
    ll = np.array(ll)
    return (a, vv, ll)

Exemple #6

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def getTP_RFPP(Mv,Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan=~(np.isnan(Mv)+np.isnan(Ml))
    Mv=Mv[nnan]
    Ml=Ml[nnan]
    rfpp=np.argmax(Ml[np.argsort(-Mv)]==1); 
    (fpr,tpr,r)=roc.roc(list(Mv),list(Ml),50,normalize=False);
    ntp=np.max(tpr); 
    return (ntp,rfpp)

Exemple #7

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def parseShandarFiles(ifile,auconly=False,**kwargs): #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns=ln.split()
        lidx=lns[0]+lns[1]
        ridx=lns[5]+lns[6]
        lbl=int(lns[9])
        return (lidx,ridx,lbl)
        
    loopath,cid,_=getFileParts(ifile)
    lcids=cid.split('_')[1]
    rcids=cid.split('_')[2]
    Mlidx={}
    Mridx={}
    Mlv=[]    
    l=0
    r=0
    with open(os.path.join(loopath,cid+'.preds')) as fp,open(os.path.join(loopath,cid+'.cont')) as fc:
        for lnp,lnc in zip(fp,fc):    
            (lidx,ridx,lbl)=parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx=Mlidx[lidx]
                except:
                    Mlidx[lidx]=l
                    lx=l
                    l=l+1
                try:
                    rx=Mridx[ridx]
                except:
                    Mridx[ridx]=r
                    rx=r
                    r=r+1
                p=float(lnp)
                Mlv.append((lx,rx,lbl,p))                
    Mvm=np.zeros((l,r))
    Mvm.fill(np.nan)
    Mlm=np.zeros((l,r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0],Mlv[i][1]]=Mlv[i][2]
        Mvm[Mlv[i][0],Mlv[i][1]]=Mlv[i][3]    
    
    (_,_,auc)=roc.roc(list(Mvm.flatten()),list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV=dict(zip(range(Mvm.shape[0]),zip(np.max(Mvm,axis=1),np.max(Mlm,axis=1))))
    rrV=dict(zip(range(Mvm.shape[1]),zip(np.max(Mvm,axis=0),np.max(Mlm,axis=0))))
    
    return auc,Mvm,Mlm,None,None,lrV,rrV

Exemple #8

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def getTP_RFPP(Mv, Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan = ~(np.isnan(Mv) + np.isnan(Ml))
    Mv = Mv[nnan]
    Ml = Ml[nnan]
    rfpp = np.argmax(Ml[np.argsort(-Mv)] == 1)
    (fpr, tpr, r) = roc.roc(list(Mv), list(Ml), 50, normalize=False)
    ntp = np.max(tpr)
    return (ntp, rfpp)

Exemple #9

0

Afficher le fichier

def roc_score(data, targetClass, otherClass, **args):

    rocN = None
    if 'rocN' in args:
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures):
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1 - auc)

    return s

Exemple #10

0

Afficher le fichier

Fichier : featsel.py Projet : bpartridge/PyML

def roc_score(data, targetClass, otherClass, **args) :

    rocN = None
    if 'rocN' in args :
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures) :
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1-auc)

    return s

Exemple #11

0

Afficher le fichier

Fichier : resultsObjects.py Projet : silasxue/Sentiment-Analysis

    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename)

Exemple #12

0

Afficher le fichier

Fichier : resultsObjects.py Projet : chaitanyambilgikar/B551-Fall-2012

    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename, **args)

Exemple #13

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def parse1SVM(ifile, auconly=False, **kwargs):  #,E,Asgl

    exfname = 'EP_6N.lbl.pkl'
    sglfile = 'result.sgl.pkl'
    try:
        E
    except NameError:

        E = getExamplesDBD.loader(exfname)
    try:
        Asgl
    except NameError:
        Asgl = cPickle.load(open(sglfile, "rb"))

    cid = getFileParts(getFileParts(ifile)[1])[1][:4]
    (la, ra, lrV, rrV) = Asgl[cid]

    I = []
    J = []
    V = []
    L = []
    Mv = np.zeros((len(lrV), len(rrV)))
    Ml = np.zeros(Mv.shape)
    for lidx, xr in enumerate(lrV.keys()):
        for ridx, xc in enumerate(rrV.keys()):
            if (xr, xc) in E.Pex[cid][0]:
                l = +1.0
            else:
                l = -1.0
            I.append(xr)
            J.append(xc)
            v = lrV[xr][0] + rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx, ridx] = v
            Ml[lidx, ridx] = l

    #pdb.set_trace()


#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_, _, auc) = roc.roc(list(Mv.flatten()), list(Ml.flatten()))
    if auconly:
        return auc

    return (auc, Mv, Ml, None, None, lrV, rrV)  #auc,Mvm,Mlm,None,None,lrV,rrV

Exemple #14

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def parse1SVM(ifile,auconly=False,**kwargs):#,E,Asgl
    
    exfname='EP_6N.lbl.pkl'
    sglfile='result.sgl.pkl' 
    try:
        E
    except NameError:
        
        E=getExamplesDBD.loader(exfname) 
    try:
        Asgl
    except NameError:
        Asgl=cPickle.load(open(sglfile, "rb" ))
    
    cid=getFileParts(getFileParts(ifile)[1])[1][:4]
    (la,ra,lrV,rrV)=Asgl[cid]
    
    I=[]
    J=[]
    V=[]
    L=[]
    Mv=np.zeros((len(lrV),len(rrV)))
    Ml=np.zeros(Mv.shape) 
    for lidx,xr in enumerate(lrV.keys()):
        for ridx,xc in enumerate(rrV.keys()):
            if (xr,xc) in E.Pex[cid][0]:
                l=+1.0
            else:
                l=-1.0
            I.append(xr)
            J.append(xc)
            v=lrV[xr][0]+rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx,ridx]=v
            Ml[lidx,ridx]=l
    
    #pdb.set_trace()
#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_,_,auc)=roc.roc(list(Mv.flatten()),list(Ml.flatten()))
    if auconly:
        return auc
    
    return (auc,Mv,Ml,None,None,lrV,rrV) #auc,Mvm,Mlm,None,None,lrV,rrV

Exemple #15

0

Afficher le fichier

def rasaPlot(rAsa,Dxx,Lxx,Np=10):
    #nb=getSamplePoints(rAsa,Np)
    nb=np.linspace(np.min(rAsa),np.max(rAsa),Np)
    R=np.zeros(len(nb)-1)
    R.fill(np.nan)
    xx=np.zeros(len(nb)-1)
    pp=np.zeros(len(nb)-1)
    nn=np.zeros(len(nb)-1)
    for idx in range(len(nb)-1):
        vidx=np.logical_and(rAsa>=nb[idx], rAsa<nb[idx+1])
        v=Dxx[vidx]
        l=Lxx[vidx]
        xx[idx]=(nb[idx]+nb[idx+1])/2.0
        pp[idx]=np.sum(l==+1)
        nn[idx]=np.sum(l!=+1)
        if(pp[idx]>10 and nn[idx]>10):
            try:
                (_,_,R[idx])=roc.roc(list(v),list(l))
            except:
                continue   
    naidx=~np.isnan(R)
    print "Correlation Coefficient: ", pearsonr(xx[naidx],R[naidx])
    #PLOTTING ONLY CODE
    stat="$\Delta$rASA"
    ww=np.diff(nb)
    plt.figure(0)
    plt.plot(xx,R,'-o')
    plt.xlabel(stat)
    plt.ylabel("AUC")
    plt.title('AUC vs $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A2.eps', format='eps', dpi=1200)

    plt.figure(1)
    plt.plot(nb[:-1],pp,'r-^',label="Interacting residues")
    plt.plot(nb[:-1],nn,'k-v',label="Not-Interacting residues")
#    plt.bar(nb[:-1],pp,color='r',width=ww,label="+1")
#    plt.bar(nb[:-1],nn,color='k',width=ww,bottom=pp,label="-1")
    plt.xlabel(stat)
    plt.ylabel("Counts")
    plt.legend(loc=0)
    plt.title('Number of residues vs. $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A3.eps', format='eps', dpi=1200)

Exemple #16

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def computeNTP(ifile, top=200, freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile) == type(''):
        assert freader is not None
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = freader(ifile, usePDBidx=False)
    else:  #expects tuple
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = ifile

    (la, lv, ll) = getAUC4Protein(lrV)
    (ra, rv, rl) = getAUC4Protein(rrV)
    Mvx = Mv.ravel()
    Mlx = Ml.ravel()
    nidx = ~np.isnan(Mvx) & ~np.isnan(Mlx)
    (_, _, auc) = roc.roc(list(Mvx[nidx]), list(Mlx[nidx]))
    Mvx[~nidx] = -np.inf
    (ttp, fpi, dntp) = findNTPinTop(Mvx, Mlx, Mv.shape, top=top)
    Mvx = Mvx[nidx]
    Mlx = Mlx[nidx]

    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp = np.sum(Mlx == 1)  # total number of positives
    nn = len(Mlx) - pp  #total number of negatives
    #pdb.set_trace()
    return (auc, ttp, fpi, dntp, la, ra, pp, nn, Mvx, Mlx, lv, ll, rv, rl)

Exemple #17

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def computeNTP(ifile,top=200,freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile)==type(''):
        assert freader is not None
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=freader(ifile,usePDBidx=False)
    else: #expects tuple
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=ifile
            
    (la,lv,ll)=getAUC4Protein(lrV)
    (ra,rv,rl)=getAUC4Protein(rrV)
    Mvx=Mv.ravel()
    Mlx=Ml.ravel()
    nidx=~np.isnan(Mvx) &  ~np.isnan(Mlx)
    (_,_,auc)=roc.roc(list(Mvx[nidx]),list(Mlx[nidx]))
    Mvx[~nidx]=-np.inf            
    (ttp,fpi,dntp)=findNTPinTop(Mvx,Mlx,Mv.shape,top=top)
    Mvx=Mvx[nidx]
    Mlx=Mlx[nidx]
    
    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp=np.sum(Mlx==1) # total number of positives
    nn=len(Mlx)-pp #total number of negatives
    #pdb.set_trace()
    return (auc,ttp,fpi,dntp,la,ra,pp,nn,Mvx,Mlx,lv,ll,rv,rl)

Exemple #18

0

Afficher le fichier

Fichier : dbdscrpp3.py Projet : foxtrotmike/pairpred

def getAUC(s):
    if type(s)==type(''):
        (r,dkey)=cPickle.load(open(s, "rb" ) )
    else:
        (r,dkey)=s
 
    patid=combineList(r.getPatternID())
    vkey=dict(zip(patid,range(len(patid))))
    decfn=combineList(r.getDecisionFunction())
    lblid=combineList(r.getGivenLabels())
    cids=dkey.keys()
    D=[[] for i in cids]
    L=[[] for i in cids]
    A=[[] for i in cids]
    try:
        R=getRMSDDict('shandar_rmsd.txt')
    except:
        R=None
    Rx=[[] for i in cids]
    for i,cid in enumerate(cids):
        cidx=dkey[cid]        
        if type(cidx) is tuple: #backward compatability to old results objects 
            cidx=cidx[0]
        for e in cidx:
            try:
                n=vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_,_,a)=roc.roc(D[i],L[i])
        A[i]=a
        if R is not None:
            Rx[i]=R[cid]        
    (fp,tp,auc)=roc.roc_VA(zip(D,L))
    return (auc,(fp,tp),(A,Rx,D,L,cids,r,dkey))

Exemple #19

0

Afficher le fichier

Fichier : dbdscrpp3.py Projet : foxtrotmike/pairpred

def getAUC(s):
    if type(s) == type(''):
        (r, dkey) = cPickle.load(open(s, "rb"))
    else:
        (r, dkey) = s

    patid = combineList(r.getPatternID())
    vkey = dict(zip(patid, range(len(patid))))
    decfn = combineList(r.getDecisionFunction())
    lblid = combineList(r.getGivenLabels())
    cids = dkey.keys()
    D = [[] for i in cids]
    L = [[] for i in cids]
    A = [[] for i in cids]
    try:
        R = getRMSDDict('shandar_rmsd.txt')
    except:
        R = None
    Rx = [[] for i in cids]
    for i, cid in enumerate(cids):
        cidx = dkey[cid]
        if type(cidx) is tuple:  #backward compatability to old results objects
            cidx = cidx[0]
        for e in cidx:
            try:
                n = vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_, _, a) = roc.roc(D[i], L[i])
        A[i] = a
        if R is not None:
            Rx[i] = R[cid]
    (fp, tp, auc) = roc.roc_VA(zip(D, L))
    return (auc, (fp, tp), (A, Rx, D, L, cids, r, dkey))

Exemple #20

0

Afficher le fichier

Fichier : aucPSSM.py Projet : foxtrotmike/pairpred

from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E = getExamplesDBD.loader(
    os.path.join('../../DBD4CSPKL/PKL', 'ENS_15_35_50.lbl.pkl'))
pdbdir = '../../DBD4CSPKL/PDB_all_'
pkldir = '../../DBD4CSPKL/PKL'
F = list(
    set([
        getFileParts(g)[1].split('.')[0]
        for g in glob.glob(os.path.join(pkldir, '*.pdb.pkl'))
    ]))
A = {}
for fid in F:
    print fid
    X = myPDB.loader(os.path.join(pkldir, fid + '.pdb.pkl'))
    C = np.max(X.pssm, axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)
    fcids = [k for k in E.Pex.keys() if (fid in k)]
    fPi = []
    for c in fcids:
        fPi.extend([i[int(c[0] != fid)] for i in E.Pex[c][0]])
    fPi = np.unique(np.array(fPi))
    if len(fPi):

        L = np.zeros(len(C))
        L[fPi] = 1.0
        A[fid] = roc(list(C), list(L))[-1]

Exemple #21

0

Afficher le fichier

Fichier : analyzeLOOCV_par_pwgen.py Projet : foxtrotmike/pairpred

def parseShandarFiles(ifile,
                      auconly=False,
                      **kwargs):  #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns = ln.split()
        lidx = lns[0] + lns[1]
        ridx = lns[5] + lns[6]
        lbl = int(lns[9])
        return (lidx, ridx, lbl)

    loopath, cid, _ = getFileParts(ifile)
    lcids = cid.split('_')[1]
    rcids = cid.split('_')[2]
    Mlidx = {}
    Mridx = {}
    Mlv = []
    l = 0
    r = 0
    with open(os.path.join(loopath, cid + '.preds')) as fp, open(
            os.path.join(loopath, cid + '.cont')) as fc:
        for lnp, lnc in zip(fp, fc):
            (lidx, ridx, lbl) = parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx = Mlidx[lidx]
                except:
                    Mlidx[lidx] = l
                    lx = l
                    l = l + 1
                try:
                    rx = Mridx[ridx]
                except:
                    Mridx[ridx] = r
                    rx = r
                    r = r + 1
                p = float(lnp)
                Mlv.append((lx, rx, lbl, p))
    Mvm = np.zeros((l, r))
    Mvm.fill(np.nan)
    Mlm = np.zeros((l, r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0], Mlv[i][1]] = Mlv[i][2]
        Mvm[Mlv[i][0], Mlv[i][1]] = Mlv[i][3]

    (_, _, auc) = roc.roc(list(Mvm.flatten()), list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV = dict(
        zip(range(Mvm.shape[0]), zip(np.max(Mvm, axis=1), np.max(Mlm,
                                                                 axis=1))))
    rrV = dict(
        zip(range(Mvm.shape[1]), zip(np.max(Mvm, axis=0), np.max(Mlm,
                                                                 axis=0))))

    return auc, Mvm, Mlm, None, None, lrV, rrV

Exemple #22

0

Afficher le fichier

Fichier : getECstats.py Projet : foxtrotmike/pairpred

     dd = []
     ld = []
     md = []
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_, _, aa_di) = roc.roc(dd, ld)
     (_, _, aa_mi) = roc.roc(md, ld)
     Xauc.append([aa_mi, aa_di])
     print cid, ncs, lstats[0].shape[0], rstats[0].shape[0], Xauc[
         -1], Xuauc[-1], Lauc[-2:]
     #pdb.set_trace()
 if (myid != 0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:

Exemple #23

0

Afficher le fichier

Fichier : analyzeFeatures.py Projet : foxtrotmike/pairpred

    '2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX',
    '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B',
    '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T',
    '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M',
    'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD',
    '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU'
]
fs = f3 + f4
E = getExamplesDBD.loader(efile)
A = {}
for cid in fs:
    print cid
    L = myPDB.loader(bdir + cid + '_l_u.pdb.pkl')
    R = myPDB.loader(bdir + cid + '_r_u.pdb.pkl')
    V = []
    Y = []
    for p in E.Pex[cid][0]:
        v = L.B[p[0]] + R.B[p[
            1]]  #np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v = L.B[n[0]] + R.B[n[
            1]]  #np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)
    (_, _, auc) = roc.roc(V, Y)
    A[cid] = auc

Exemple #24

0

Afficher le fichier

Fichier : analyzeFeatures.py Projet : foxtrotmike/pairpred

efile=bdir+'E_125PN_15_35_50.lbl.pkl'
#fs=glob.glob(bdir+'*_u.pdb.pkl')
f3=['1SBB', '1JPS', '2HMI', '1GHQ', '1KTZ', '1K74', '1D6R', '2SIC', '1GPW', '1XD3', '1EAW', '1VFB', '7CEI', '1E4K', '1I4D', '1H1V', '2PCC', '1FQ1', '2HLE', '1FQJ', '1S1Q', '2OOB', '1UDI', '1KLU', '1WQ1', '1CGI', '1ATN', '1N2C', '1GP2', '1FAK', '1NW9', '1GLA', '1GRN', '2HRK', '1AZS', '1JMO', '1PXV', '1EWY', '1RLB', '1DQJ', '2BTF', '2I25', '1I2M', '1BUH', '1BGX', '1ML0', '1EFN', '1DFJ', '1Y64', '2UUY', '1MAH', '1BVK', '1BVN', '1EER', '1MLC', '1NSN', '1AK4', '1A2K', '1QFW', '2H7V', '1T6B', '1KAC', '1YVB', '1J2J', '1QA9', '1AHW', '2OT3', '2FD6', '2AJF', '1K4C', '1NCA', '1OPH', '1XQS', '1B6C', '1PPE', '2O8V', '1HIA', '1Z0K', '1R0R', '1WEJ', '1ACB', '1KXP', '1KXQ', '1R8S', '1IRA', '1GCQ', '1F51', '2B42', '2HQS', '1AKJ', '2JEL', '1KKL', '1FC2', '1E96', '1N8O', '2MTA', '2VIS', '1IB1', '1E6J', '1Z5Y', '1EZU', '1TMQ', '2C0L', '1E6E', '1IQD', '1ZHI', '1M10', '2NZ8', '1AY7', '1HE8', '1IJK', '1HE1', '1FSK', '1F34', '2SNI', '1BJ1', '2CFH', '1BKD', '1DE4', '1IBR', '1I9R', '1K5D', '1AVX']
f4=['2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU']
fs=f3+f4
E=getExamplesDBD.loader(efile)
A={}
for cid in fs:
    print cid
    L=myPDB.loader(bdir+cid+'_l_u.pdb.pkl')
    R=myPDB.loader(bdir+cid+'_r_u.pdb.pkl')   
    V=[]
    Y=[]
    for p in E.Pex[cid][0]:
        v=L.B[p[0]]+R.B[p[1]]#np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v=L.B[n[0]]+R.B[n[1]]#np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)            
    (_,_,auc)=roc.roc(V,Y)
    A[cid]=auc

Exemple #25

0

Afficher le fichier

Fichier : analyzeCAPRI.py Projet : foxtrotmike/pairpred

from PyML.evaluators import roc
from postProcess import postProcessAvg
#from getExamplesDBD import getPosex
from symmetryProcessing import *
#bdir='../CAPRI/'
bdir='../../g2mers/'
cid='1MLC'
#(_,_,P,_,_)=getPosex(bdir,cid)    #get positive examples    
P=getPosexFromPDB(bdir,cid,dthr=6.0)        # Handles symmetry in the complex
ppfile=bdir+cid+'.pairpred.txt'

(auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(ppfile,usePDBidx=False)
#auc0,Mvc0,Mv,Mlc,lseq,rseq,lrV0,lrV,rrV0,rrV=postProcessAvg(cid,bdir,bdir)
#
#Mv[:10,:]=np.nan
#Mv[-10:,:]=np.nan
#Mv[:,:10]=np.nan
#Mv[:,-10:]=np.nan

Mvtbl=np.zeros(Mv.shape)
for (i,j) in P:
    Mvtbl[i,j]=1.0
Mvr=Mv.ravel()
Mvtblr=Mvtbl.ravel()
nidx=(~np.isnan(Mvr))
Mvr=Mvr[nidx]
Mvtblr=Mvtblr[nidx]
(fpv,tpv,aucv)=roc.roc(list(Mvr),list(Mvtblr))
print cid,"AUC =",aucv, "RFPP =",np.argmax(Mvtblr[np.argsort(-Mvr)]==1)
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=0)),list(np.nanmax(Mvtbl,axis=0))); print auc
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=1)),list(np.nanmax(Mvtbl,axis=1))); print auc

Exemple #26

0

Afficher le fichier

Fichier : getECstats.py Projet : foxtrotmike/pairpred

     dd=[]
     ld=[]
     md=[]
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_,_,aa_di)=roc.roc(dd,ld)
     (_,_,aa_mi)=roc.roc(md,ld)
     Xauc.append([aa_mi,aa_di])
     print cid,ncs,lstats[0].shape[0],rstats[0].shape[0],Xauc[-1],Xuauc[-1],Lauc[-2:]
     #pdb.set_trace()
 if(myid!=0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:
         MV=[]

Exemple #27

0

Afficher le fichier

Fichier : resultsObjects.py Projet : chaitanyambilgikar/B551-Fall-2012

    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue

Exemple #28

0

Afficher le fichier

Fichier : resultsObjects.py Projet : silasxue/Sentiment-Analysis

    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue

Exemple #29

0

Afficher le fichier

Fichier : aucPSSM.py Projet : foxtrotmike/pairpred

"""
Created on Wed Nov 27 08:40:33 2013

@author: root
"""
from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E=getExamplesDBD.loader(os.path.join('../../DBD4CSPKL/PKL','ENS_15_35_50.lbl.pkl'))
pdbdir='../../DBD4CSPKL/PDB_all_'
pkldir='../../DBD4CSPKL/PKL'
F=list(set([getFileParts(g)[1].split('.')[0] for g in glob.glob(os.path.join(pkldir,'*.pdb.pkl'))]))
A={}
for fid in F:    
    print fid
    X=myPDB.loader(os.path.join(pkldir,fid+'.pdb.pkl'))
    C=np.max(X.pssm,axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)        
    fcids=[k for k in E.Pex.keys() if (fid in k)]
    fPi=[]
    for c in fcids:        
        fPi.extend([i[int(c[0]!=fid)] for i in E.Pex[c][0]])
    fPi=np.unique(np.array(fPi))
    if len(fPi):
        
        L=np.zeros(len(C))
        L[fPi]=1.0
        A[fid]=roc(list(C),list(L))[-1]

Exemple #30

0

Afficher le fichier

Fichier : analyzeLOOCV_par.py Projet : foxtrotmike/pairpred

def getAUC4Protein(lrV):
    vl=map(list, zip(*lrV.values()));vv=vl[0];ll=vl[1]    
    (_,_,a)=roc.roc(vv,ll)
    vv=np.array(vv)
    ll=np.array(ll)
    return (a,vv,ll)

Exemple #31

0

Afficher le fichier

        except:
            continue
    #Get our results
    Mo=np.zeros((len(L.S2Ri),len(R.S2Ri)))
    Mo.fill(np.nan)
    ifile=cdir+'/InterPRed_prediction/2X000.InterPRed.txt'

    for ln in open(ifile,'r'):
        lns=ln.split()
        r=int(lns[3])
        c=int(lns[8])
        v=float(lns[10])
        Mo[r,c]=v    
    """
    print 'MI1- auc',roc.roc(MI1,trbs)[-1]
    print 'Shandar - auc',roc.roc(np.nanmax(M,axis=0),trbs)[-1]
    print 'Our - auc',roc.roc(np.nanmax(Mo,axis=0),trbs)[-1]
    plotdv(rseqn,trbs)
    plotdv(rseqn,np.nanmax(M,axis=0))
    plotdv(rseqn,np.nanmax(Mo,axis=0))
    #plotdv(rseqn,MI1)
    plt.show()
    """
    plt.plot([0,1],[0, 1],'k:',linewidth=2.0)
    (fp,tp,auc)=roc.roc(list(M.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'r-.',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(MI1,list(np.nanmax(Mt,axis=0)));plt.plot(fp,tp,'g--',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(list(Mo.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'b-',linewidth=2.0);print auc
    plt.grid()
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.legend(['Random : 50.0','PPiPP : 54.0','MI-1 : 59.6','PAIRPred : 63.8'],loc=0);plt.title('EF-CAM Results');plt.show()