Ejemplos de roc en Python, ejemplos de PyML.evaluators.roc.roc en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: analyzeDists.py Proyecto: foxtrotmike/pairpred

def computeDistMeansForComplex(cid,N,pdbpklpath,pppath):
    """
    code for getting distance and auc information
    """
    L=myPDB.loader(os.path.join(pdbpklpath,cid+'_l_u.pdb.pkl'))
    R=myPDB.loader(os.path.join(pdbpklpath,cid+'_r_u.pdb.pkl'))
    if type(pppath)==type(''):
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=readFile(pppath+cid+'.pairpred.txt',usePDBidx=False)
    else:
        (pauc,Mv,Ml,lseq,rseq,lauc,rauc)=pppath
    lauc=None
    rauc=None
    try:
        (_,_,lauc)=roc.roc(list(np.array(lauc.values())[:,0]),list(np.array(lauc.values())[:,1]))
        (_,_,rauc)=roc.roc(list(np.array(rauc.values())[:,0]),list(np.array(rauc.values())[:,1]))
    except:
        pass
    Mlx=np.random.random(Ml.shape)
    Mlx[Ml<0]=-1
    (r,c,v)=sortScores(Mlx)
    
    lD=getDistMat(getCoords(L.R))
    rD=getDistMat(getCoords(R.R))    
    #pdb.set_trace()
    M=20
    return pauc,lauc,rauc,getDistMean(lD,r,top=True,M=M), getDistMean(lD,r,top=False,M=M), getDistMean(rD,c,top=True,M=M), getDistMean(rD,c,top=False,M=M)

Ejemplo n.º 2

0

Mostrar archivo

def computeDistMeansForComplex(cid, N, pdbpklpath, pppath):
    """
    code for getting distance and auc information
    """
    L = myPDB.loader(os.path.join(pdbpklpath, cid + '_l_u.pdb.pkl'))
    R = myPDB.loader(os.path.join(pdbpklpath, cid + '_r_u.pdb.pkl'))
    if type(pppath) == type(''):
        (pauc, Mv, Ml, lseq, rseq, lauc,
         rauc) = readFile(pppath + cid + '.pairpred.txt', usePDBidx=False)
    else:
        (pauc, Mv, Ml, lseq, rseq, lauc, rauc) = pppath
    lauc = None
    rauc = None
    try:
        (_, _, lauc) = roc.roc(list(np.array(lauc.values())[:, 0]),
                               list(np.array(lauc.values())[:, 1]))
        (_, _, rauc) = roc.roc(list(np.array(rauc.values())[:, 0]),
                               list(np.array(rauc.values())[:, 1]))
    except:
        pass
    Mlx = np.random.random(Ml.shape)
    Mlx[Ml < 0] = -1
    (r, c, v) = sortScores(Mlx)

    lD = getDistMat(getCoords(L.R))
    rD = getDistMat(getCoords(R.R))
    #pdb.set_trace()
    M = 20
    return pauc, lauc, rauc, getDistMean(lD, r, top=True, M=M), getDistMean(
        lD, r, top=False, M=M), getDistMean(rD, c, top=True,
                                            M=M), getDistMean(rD,
                                                              c,
                                                              top=False,
                                                              M=M)

Ejemplo n.º 3

0

Mostrar archivo

Archivo: calcMIDI.py Proyecto: foxtrotmike/pairpred

def getAUCs(mx,dx,dd,dthr=6.0):
    lx=2*(dd<dthr)-1
    mxf=mx.flatten()
    dxf=dx.flatten()
    lxf=lx.flatten()
    nanidx=~(np.isnan(mxf)+np.isnan(lxf)+np.isnan(dxf))
    lxf=list(lxf[nanidx])
    mxf=list(mxf[nanidx])
    dxf=list(dxf[nanidx])
    (_,_,aa_mi)=roc.roc(mxf,lxf)
    (_,_,aa_di)=roc.roc(dxf,lxf)
    return aa_mi,aa_di

Ejemplo n.º 4

0

Mostrar archivo

Archivo: calcMIDI.py Proyecto: foxtrotmike/pairpred

def getAUCs(mx, dx, dd, dthr=6.0):
    lx = 2 * (dd < dthr) - 1
    mxf = mx.flatten()
    dxf = dx.flatten()
    lxf = lx.flatten()
    nanidx = ~(np.isnan(mxf) + np.isnan(lxf) + np.isnan(dxf))
    lxf = list(lxf[nanidx])
    mxf = list(mxf[nanidx])
    dxf = list(dxf[nanidx])
    (_, _, aa_mi) = roc.roc(mxf, lxf)
    (_, _, aa_di) = roc.roc(dxf, lxf)
    return aa_mi, aa_di

Ejemplo n.º 5

0

Mostrar archivo

def getAUC4Protein(lrV):
    vl = map(list, zip(*lrV.values()))
    vv = vl[0]
    ll = vl[1]
    (_, _, a) = roc.roc(vv, ll)
    vv = np.array(vv)
    ll = np.array(ll)
    return (a, vv, ll)

Ejemplo n.º 6

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def getTP_RFPP(Mv,Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan=~(np.isnan(Mv)+np.isnan(Ml))
    Mv=Mv[nnan]
    Ml=Ml[nnan]
    rfpp=np.argmax(Ml[np.argsort(-Mv)]==1); 
    (fpr,tpr,r)=roc.roc(list(Mv),list(Ml),50,normalize=False);
    ntp=np.max(tpr); 
    return (ntp,rfpp)

Ejemplo n.º 7

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def parseShandarFiles(ifile,auconly=False,**kwargs): #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns=ln.split()
        lidx=lns[0]+lns[1]
        ridx=lns[5]+lns[6]
        lbl=int(lns[9])
        return (lidx,ridx,lbl)
        
    loopath,cid,_=getFileParts(ifile)
    lcids=cid.split('_')[1]
    rcids=cid.split('_')[2]
    Mlidx={}
    Mridx={}
    Mlv=[]    
    l=0
    r=0
    with open(os.path.join(loopath,cid+'.preds')) as fp,open(os.path.join(loopath,cid+'.cont')) as fc:
        for lnp,lnc in zip(fp,fc):    
            (lidx,ridx,lbl)=parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx=Mlidx[lidx]
                except:
                    Mlidx[lidx]=l
                    lx=l
                    l=l+1
                try:
                    rx=Mridx[ridx]
                except:
                    Mridx[ridx]=r
                    rx=r
                    r=r+1
                p=float(lnp)
                Mlv.append((lx,rx,lbl,p))                
    Mvm=np.zeros((l,r))
    Mvm.fill(np.nan)
    Mlm=np.zeros((l,r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0],Mlv[i][1]]=Mlv[i][2]
        Mvm[Mlv[i][0],Mlv[i][1]]=Mlv[i][3]    
    
    (_,_,auc)=roc.roc(list(Mvm.flatten()),list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV=dict(zip(range(Mvm.shape[0]),zip(np.max(Mvm,axis=1),np.max(Mlm,axis=1))))
    rrV=dict(zip(range(Mvm.shape[1]),zip(np.max(Mvm,axis=0),np.max(Mlm,axis=0))))
    
    return auc,Mvm,Mlm,None,None,lrV,rrV

Ejemplo n.º 8

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def getTP_RFPP(Mv, Ml):
    """
    Returns the number of true positives in the top 50 predictions and the index of the first positive prediction detected
    """
    nnan = ~(np.isnan(Mv) + np.isnan(Ml))
    Mv = Mv[nnan]
    Ml = Ml[nnan]
    rfpp = np.argmax(Ml[np.argsort(-Mv)] == 1)
    (fpr, tpr, r) = roc.roc(list(Mv), list(Ml), 50, normalize=False)
    ntp = np.max(tpr)
    return (ntp, rfpp)

Ejemplo n.º 9

0

Mostrar archivo

def roc_score(data, targetClass, otherClass, **args):

    rocN = None
    if 'rocN' in args:
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures):
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1 - auc)

    return s

Ejemplo n.º 10

0

Mostrar archivo

Archivo: featsel.py Proyecto: bpartridge/PyML

def roc_score(data, targetClass, otherClass, **args) :

    rocN = None
    if 'rocN' in args :
        rocN = args['rocN']
    s = numpy.zeros(data.numFeatures, numpy.float_)
    for i in range(data.numFeatures) :
        featureValues = data.getFeature(i)
        auc = roc.roc(featureValues, data.labels.Y)[2]
        s[i] = max(auc, 1-auc)

    return s

Ejemplo n.º 11

0

Mostrar archivo

Archivo: resultsObjects.py Proyecto: silasxue/Sentiment-Analysis

    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename)

Ejemplo n.º 12

0

Mostrar archivo

Archivo: resultsObjects.py Proyecto: chaitanyambilgikar/B551-Fall-2012

    def plotROC(self, filename=None, fold = None, **args) :

        rocN = None
        if 'rocN' in args :
            rocN = args['rocN']
        if self.numFolds == 1 :
            # if the results are for a single split
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        elif fold is None :
            # get an averaged ROC curve
            labels = self.getGivenClass()
            dvals = self.getDecisionFunction()
            folds = [(dvals[i], labels[i]) for i in range(len(labels))]
            rocFP, rocTP, area = roc_module.roc_VA(folds, rocN)
        else :
            # plot an ROC plot for the given fold
            if fold > self.numFolds :
                raise ValueError, 'foldNum too large'
            labels = self.getGivenClass(fold)
            dvals = self.getDecisionFunction(fold)
            rocFP, rocTP, area = roc_module.roc(dvals, labels, rocN)
        roc_module.plotROC(rocFP, rocTP, filename, **args)

Ejemplo n.º 13

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def parse1SVM(ifile, auconly=False, **kwargs):  #,E,Asgl

    exfname = 'EP_6N.lbl.pkl'
    sglfile = 'result.sgl.pkl'
    try:
        E
    except NameError:

        E = getExamplesDBD.loader(exfname)
    try:
        Asgl
    except NameError:
        Asgl = cPickle.load(open(sglfile, "rb"))

    cid = getFileParts(getFileParts(ifile)[1])[1][:4]
    (la, ra, lrV, rrV) = Asgl[cid]

    I = []
    J = []
    V = []
    L = []
    Mv = np.zeros((len(lrV), len(rrV)))
    Ml = np.zeros(Mv.shape)
    for lidx, xr in enumerate(lrV.keys()):
        for ridx, xc in enumerate(rrV.keys()):
            if (xr, xc) in E.Pex[cid][0]:
                l = +1.0
            else:
                l = -1.0
            I.append(xr)
            J.append(xc)
            v = lrV[xr][0] + rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx, ridx] = v
            Ml[lidx, ridx] = l

    #pdb.set_trace()


#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_, _, auc) = roc.roc(list(Mv.flatten()), list(Ml.flatten()))
    if auconly:
        return auc

    return (auc, Mv, Ml, None, None, lrV, rrV)  #auc,Mvm,Mlm,None,None,lrV,rrV

Ejemplo n.º 14

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def parse1SVM(ifile,auconly=False,**kwargs):#,E,Asgl
    
    exfname='EP_6N.lbl.pkl'
    sglfile='result.sgl.pkl' 
    try:
        E
    except NameError:
        
        E=getExamplesDBD.loader(exfname) 
    try:
        Asgl
    except NameError:
        Asgl=cPickle.load(open(sglfile, "rb" ))
    
    cid=getFileParts(getFileParts(ifile)[1])[1][:4]
    (la,ra,lrV,rrV)=Asgl[cid]
    
    I=[]
    J=[]
    V=[]
    L=[]
    Mv=np.zeros((len(lrV),len(rrV)))
    Ml=np.zeros(Mv.shape) 
    for lidx,xr in enumerate(lrV.keys()):
        for ridx,xc in enumerate(rrV.keys()):
            if (xr,xc) in E.Pex[cid][0]:
                l=+1.0
            else:
                l=-1.0
            I.append(xr)
            J.append(xc)
            v=lrV[xr][0]+rrV[xc][0]
            V.append(v)
            L.append(l)
            Mv[lidx,ridx]=v
            Ml[lidx,ridx]=l
    
    #pdb.set_trace()
#    for idx in range(len(I)):
#        Mv[I[idx],J[idx]]=V[idx]
#        Ml[I[idx],J[idx]]=L[idx]
    (_,_,auc)=roc.roc(list(Mv.flatten()),list(Ml.flatten()))
    if auconly:
        return auc
    
    return (auc,Mv,Ml,None,None,lrV,rrV) #auc,Mvm,Mlm,None,None,lrV,rrV

Ejemplo n.º 15

0

Mostrar archivo

def rasaPlot(rAsa,Dxx,Lxx,Np=10):
    #nb=getSamplePoints(rAsa,Np)
    nb=np.linspace(np.min(rAsa),np.max(rAsa),Np)
    R=np.zeros(len(nb)-1)
    R.fill(np.nan)
    xx=np.zeros(len(nb)-1)
    pp=np.zeros(len(nb)-1)
    nn=np.zeros(len(nb)-1)
    for idx in range(len(nb)-1):
        vidx=np.logical_and(rAsa>=nb[idx], rAsa<nb[idx+1])
        v=Dxx[vidx]
        l=Lxx[vidx]
        xx[idx]=(nb[idx]+nb[idx+1])/2.0
        pp[idx]=np.sum(l==+1)
        nn[idx]=np.sum(l!=+1)
        if(pp[idx]>10 and nn[idx]>10):
            try:
                (_,_,R[idx])=roc.roc(list(v),list(l))
            except:
                continue   
    naidx=~np.isnan(R)
    print "Correlation Coefficient: ", pearsonr(xx[naidx],R[naidx])
    #PLOTTING ONLY CODE
    stat="$\Delta$rASA"
    ww=np.diff(nb)
    plt.figure(0)
    plt.plot(xx,R,'-o')
    plt.xlabel(stat)
    plt.ylabel("AUC")
    plt.title('AUC vs $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A2.eps', format='eps', dpi=1200)

    plt.figure(1)
    plt.plot(nb[:-1],pp,'r-^',label="Interacting residues")
    plt.plot(nb[:-1],nn,'k-v',label="Not-Interacting residues")
#    plt.bar(nb[:-1],pp,color='r',width=ww,label="+1")
#    plt.bar(nb[:-1],nn,color='k',width=ww,bottom=pp,label="-1")
    plt.xlabel(stat)
    plt.ylabel("Counts")
    plt.legend(loc=0)
    plt.title('Number of residues vs. $\Delta$rASA')
    plt.grid()
    plt.savefig('../figures/fig_A3.eps', format='eps', dpi=1200)

Ejemplo n.º 16

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def computeNTP(ifile, top=200, freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile) == type(''):
        assert freader is not None
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = freader(ifile, usePDBidx=False)
    else:  #expects tuple
        (_, Mv, Ml, lseq, rseq, lrV, rrV) = ifile

    (la, lv, ll) = getAUC4Protein(lrV)
    (ra, rv, rl) = getAUC4Protein(rrV)
    Mvx = Mv.ravel()
    Mlx = Ml.ravel()
    nidx = ~np.isnan(Mvx) & ~np.isnan(Mlx)
    (_, _, auc) = roc.roc(list(Mvx[nidx]), list(Mlx[nidx]))
    Mvx[~nidx] = -np.inf
    (ttp, fpi, dntp) = findNTPinTop(Mvx, Mlx, Mv.shape, top=top)
    Mvx = Mvx[nidx]
    Mlx = Mlx[nidx]

    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp = np.sum(Mlx == 1)  # total number of positives
    nn = len(Mlx) - pp  #total number of negatives
    #pdb.set_trace()
    return (auc, ttp, fpi, dntp, la, ra, pp, nn, Mvx, Mlx, lv, ll, rv, rl)

Ejemplo n.º 17

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def computeNTP(ifile,top=200,freader=None):
    """
    Given a result file name ifile and its reader function freader=parseShandarFiles, it computes
    auc: The auc score 
    ttp: Number of true positives in top
    fpi: index of the first true positive
    dntp: Distance to the nearest true positive for each top example
    la: auc of ligand
    ra: auc of receptor
    pp: number of positive examples
    nn: number of negative examples
    Mvx: flattened matrix of prediction scores
    Mlx: flattened matrix of labels
        on input auc (not used, recomputed from prediction scores and labels, kept for compatability to file reader)
    """
    if type(ifile)==type(''):
        assert freader is not None
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=freader(ifile,usePDBidx=False)
    else: #expects tuple
        (_,Mv,Ml,lseq,rseq,lrV,rrV)=ifile
            
    (la,lv,ll)=getAUC4Protein(lrV)
    (ra,rv,rl)=getAUC4Protein(rrV)
    Mvx=Mv.ravel()
    Mlx=Ml.ravel()
    nidx=~np.isnan(Mvx) &  ~np.isnan(Mlx)
    (_,_,auc)=roc.roc(list(Mvx[nidx]),list(Mlx[nidx]))
    Mvx[~nidx]=-np.inf            
    (ttp,fpi,dntp)=findNTPinTop(Mvx,Mlx,Mv.shape,top=top)
    Mvx=Mvx[nidx]
    Mlx=Mlx[nidx]
    
    #yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))#PrecisionRecallCurve
    #zxA=yard.ROCCurve(yard.BinaryClassifierData(zip(Mvx,Mlx)))
    #zxR=yard.ROCCurve(yard.BinaryClassifierData(zip(rv,rl)))
    #zxL=yard.ROCCurve(yard.BinaryClassifierData(zip(lv,ll)))
    #pdb.set_trace()
    pp=np.sum(Mlx==1) # total number of positives
    nn=len(Mlx)-pp #total number of negatives
    #pdb.set_trace()
    return (auc,ttp,fpi,dntp,la,ra,pp,nn,Mvx,Mlx,lv,ll,rv,rl)

Ejemplo n.º 18

0

Mostrar archivo

Archivo: dbdscrpp3.py Proyecto: foxtrotmike/pairpred

def getAUC(s):
    if type(s)==type(''):
        (r,dkey)=cPickle.load(open(s, "rb" ) )
    else:
        (r,dkey)=s
 
    patid=combineList(r.getPatternID())
    vkey=dict(zip(patid,range(len(patid))))
    decfn=combineList(r.getDecisionFunction())
    lblid=combineList(r.getGivenLabels())
    cids=dkey.keys()
    D=[[] for i in cids]
    L=[[] for i in cids]
    A=[[] for i in cids]
    try:
        R=getRMSDDict('shandar_rmsd.txt')
    except:
        R=None
    Rx=[[] for i in cids]
    for i,cid in enumerate(cids):
        cidx=dkey[cid]        
        if type(cidx) is tuple: #backward compatability to old results objects 
            cidx=cidx[0]
        for e in cidx:
            try:
                n=vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_,_,a)=roc.roc(D[i],L[i])
        A[i]=a
        if R is not None:
            Rx[i]=R[cid]        
    (fp,tp,auc)=roc.roc_VA(zip(D,L))
    return (auc,(fp,tp),(A,Rx,D,L,cids,r,dkey))

Ejemplo n.º 19

0

Mostrar archivo

Archivo: dbdscrpp3.py Proyecto: foxtrotmike/pairpred

def getAUC(s):
    if type(s) == type(''):
        (r, dkey) = cPickle.load(open(s, "rb"))
    else:
        (r, dkey) = s

    patid = combineList(r.getPatternID())
    vkey = dict(zip(patid, range(len(patid))))
    decfn = combineList(r.getDecisionFunction())
    lblid = combineList(r.getGivenLabels())
    cids = dkey.keys()
    D = [[] for i in cids]
    L = [[] for i in cids]
    A = [[] for i in cids]
    try:
        R = getRMSDDict('shandar_rmsd.txt')
    except:
        R = None
    Rx = [[] for i in cids]
    for i, cid in enumerate(cids):
        cidx = dkey[cid]
        if type(cidx) is tuple:  #backward compatability to old results objects
            cidx = cidx[0]
        for e in cidx:
            try:
                n = vkey[e]
            except KeyError:
                pdb.set_trace()
            D[i].append(decfn[n])
            L[i].append(lblid[n])
        (_, _, a) = roc.roc(D[i], L[i])
        A[i] = a
        if R is not None:
            Rx[i] = R[cid]
    (fp, tp, auc) = roc.roc_VA(zip(D, L))
    return (auc, (fp, tp), (A, Rx, D, L, cids, r, dkey))

Ejemplo n.º 20

0

Mostrar archivo

Archivo: aucPSSM.py Proyecto: foxtrotmike/pairpred

from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E = getExamplesDBD.loader(
    os.path.join('../../DBD4CSPKL/PKL', 'ENS_15_35_50.lbl.pkl'))
pdbdir = '../../DBD4CSPKL/PDB_all_'
pkldir = '../../DBD4CSPKL/PKL'
F = list(
    set([
        getFileParts(g)[1].split('.')[0]
        for g in glob.glob(os.path.join(pkldir, '*.pdb.pkl'))
    ]))
A = {}
for fid in F:
    print fid
    X = myPDB.loader(os.path.join(pkldir, fid + '.pdb.pkl'))
    C = np.max(X.pssm, axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)
    fcids = [k for k in E.Pex.keys() if (fid in k)]
    fPi = []
    for c in fcids:
        fPi.extend([i[int(c[0] != fid)] for i in E.Pex[c][0]])
    fPi = np.unique(np.array(fPi))
    if len(fPi):

        L = np.zeros(len(C))
        L[fPi] = 1.0
        A[fid] = roc(list(C), list(L))[-1]

Ejemplo n.º 21

0

Mostrar archivo

Archivo: analyzeLOOCV_par_pwgen.py Proyecto: foxtrotmike/pairpred

def parseShandarFiles(ifile,
                      auconly=False,
                      **kwargs):  #(auc,Mv,Ml,lseq,rseq,lrV,rrV)
    """
    Reads shandar's output files with labels (made on the same pattern as analyzePredFile.readFile)
    """
    def parseContLine(ln):
        # ['A', '#5', 'ASN:7', 'N', '::', 'B', '#5', 'HIS:6', 'H:', '0', '53.61']
        #   0   1       2       3     4     5   6       7       8    9      10
        lns = ln.split()
        lidx = lns[0] + lns[1]
        ridx = lns[5] + lns[6]
        lbl = int(lns[9])
        return (lidx, ridx, lbl)

    loopath, cid, _ = getFileParts(ifile)
    lcids = cid.split('_')[1]
    rcids = cid.split('_')[2]
    Mlidx = {}
    Mridx = {}
    Mlv = []
    l = 0
    r = 0
    with open(os.path.join(loopath, cid + '.preds')) as fp, open(
            os.path.join(loopath, cid + '.cont')) as fc:
        for lnp, lnc in zip(fp, fc):
            (lidx, ridx, lbl) = parseContLine(lnc)
            if lidx[0] in lcids and ridx[0] in rcids:
                try:
                    lx = Mlidx[lidx]
                except:
                    Mlidx[lidx] = l
                    lx = l
                    l = l + 1
                try:
                    rx = Mridx[ridx]
                except:
                    Mridx[ridx] = r
                    rx = r
                    r = r + 1
                p = float(lnp)
                Mlv.append((lx, rx, lbl, p))
    Mvm = np.zeros((l, r))
    Mvm.fill(np.nan)
    Mlm = np.zeros((l, r))
    for i in range(len(Mlv)):
        Mlm[Mlv[i][0], Mlv[i][1]] = Mlv[i][2]
        Mvm[Mlv[i][0], Mlv[i][1]] = Mlv[i][3]

    (_, _, auc) = roc.roc(list(Mvm.flatten()), list(Mlm.flatten()))
    if auconly:
        return auc
    #construct lrV,rrV
    lrV = dict(
        zip(range(Mvm.shape[0]), zip(np.max(Mvm, axis=1), np.max(Mlm,
                                                                 axis=1))))
    rrV = dict(
        zip(range(Mvm.shape[1]), zip(np.max(Mvm, axis=0), np.max(Mlm,
                                                                 axis=0))))

    return auc, Mvm, Mlm, None, None, lrV, rrV

Ejemplo n.º 22

0

Mostrar archivo

Archivo: getECstats.py Proyecto: foxtrotmike/pairpred

     dd = []
     ld = []
     md = []
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_, _, aa_di) = roc.roc(dd, ld)
     (_, _, aa_mi) = roc.roc(md, ld)
     Xauc.append([aa_mi, aa_di])
     print cid, ncs, lstats[0].shape[0], rstats[0].shape[0], Xauc[
         -1], Xuauc[-1], Lauc[-2:]
     #pdb.set_trace()
 if (myid != 0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:

Ejemplo n.º 23

0

Mostrar archivo

Archivo: analyzeFeatures.py Proyecto: foxtrotmike/pairpred

    '2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX',
    '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B',
    '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T',
    '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M',
    'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD',
    '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU'
]
fs = f3 + f4
E = getExamplesDBD.loader(efile)
A = {}
for cid in fs:
    print cid
    L = myPDB.loader(bdir + cid + '_l_u.pdb.pkl')
    R = myPDB.loader(bdir + cid + '_r_u.pdb.pkl')
    V = []
    Y = []
    for p in E.Pex[cid][0]:
        v = L.B[p[0]] + R.B[p[
            1]]  #np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v = L.B[n[0]] + R.B[n[
            1]]  #np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)
    (_, _, auc) = roc.roc(V, Y)
    A[cid] = auc

Ejemplo n.º 24

0

Mostrar archivo

Archivo: analyzeFeatures.py Proyecto: foxtrotmike/pairpred

efile=bdir+'E_125PN_15_35_50.lbl.pkl'
#fs=glob.glob(bdir+'*_u.pdb.pkl')
f3=['1SBB', '1JPS', '2HMI', '1GHQ', '1KTZ', '1K74', '1D6R', '2SIC', '1GPW', '1XD3', '1EAW', '1VFB', '7CEI', '1E4K', '1I4D', '1H1V', '2PCC', '1FQ1', '2HLE', '1FQJ', '1S1Q', '2OOB', '1UDI', '1KLU', '1WQ1', '1CGI', '1ATN', '1N2C', '1GP2', '1FAK', '1NW9', '1GLA', '1GRN', '2HRK', '1AZS', '1JMO', '1PXV', '1EWY', '1RLB', '1DQJ', '2BTF', '2I25', '1I2M', '1BUH', '1BGX', '1ML0', '1EFN', '1DFJ', '1Y64', '2UUY', '1MAH', '1BVK', '1BVN', '1EER', '1MLC', '1NSN', '1AK4', '1A2K', '1QFW', '2H7V', '1T6B', '1KAC', '1YVB', '1J2J', '1QA9', '1AHW', '2OT3', '2FD6', '2AJF', '1K4C', '1NCA', '1OPH', '1XQS', '1B6C', '1PPE', '2O8V', '1HIA', '1Z0K', '1R0R', '1WEJ', '1ACB', '1KXP', '1KXQ', '1R8S', '1IRA', '1GCQ', '1F51', '2B42', '2HQS', '1AKJ', '2JEL', '1KKL', '1FC2', '1E96', '1N8O', '2MTA', '2VIS', '1IB1', '1E6J', '1Z5Y', '1EZU', '1TMQ', '2C0L', '1E6E', '1IQD', '1ZHI', '1M10', '2NZ8', '1AY7', '1HE8', '1IJK', '1HE1', '1FSK', '1F34', '2SNI', '1BJ1', '2CFH', '1BKD', '1DE4', '1IBR', '1I9R', '1K5D', '1AVX']
f4=['2A5T', '3CPH', '1ZHH', '2ABZ', '1LFD', '2OUL', '1JIW', '2B4J', '1SYX', '1FLE', '1JTG', '2AYO', '4CPA', '1CLV', '1OC0', '1XU1', '1R6Q', '2O3B', '1US7', '3D5S', '1JZD', '1HCF', '1OYV', '2OZA', '1H9D', '2A9K', '2J0T', '2Z0E', '3BP8', '2IDO', '1WDW', '1ZLI', '2VDB', '1RV6', '1FFW', '1F6M', 'BOYV', '1JWH', '2OOR', '1MQ8', '1GL1', '1PVH', '2I9B', '1OFU', '1GXD', '3SGQ', '1JK9', '1ZM4', '1FCC', '2G77', '2J7P', '2FJU']
fs=f3+f4
E=getExamplesDBD.loader(efile)
A={}
for cid in fs:
    print cid
    L=myPDB.loader(bdir+cid+'_l_u.pdb.pkl')
    R=myPDB.loader(bdir+cid+'_r_u.pdb.pkl')   
    V=[]
    Y=[]
    for p in E.Pex[cid][0]:
        v=L.B[p[0]]+R.B[p[1]]#np.abs(L.ASA[p[0]]-L.asa[p[0]])+np.abs(R.ASA[p[1]]-R.asa[p[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(+1)
    for n in E.getNegEx(cid):
        v=L.B[n[0]]+R.B[n[1]]#np.abs(L.ASA[n[0]]-L.asa[n[0]])+np.abs(R.ASA[n[1]]-R.asa[n[1]])
        if ~np.isnan(v):
            V.append(v)
            Y.append(-1)            
    (_,_,auc)=roc.roc(V,Y)
    A[cid]=auc

Ejemplo n.º 25

0

Mostrar archivo

Archivo: analyzeCAPRI.py Proyecto: foxtrotmike/pairpred

from PyML.evaluators import roc
from postProcess import postProcessAvg
#from getExamplesDBD import getPosex
from symmetryProcessing import *
#bdir='../CAPRI/'
bdir='../../g2mers/'
cid='1MLC'
#(_,_,P,_,_)=getPosex(bdir,cid)    #get positive examples    
P=getPosexFromPDB(bdir,cid,dthr=6.0)        # Handles symmetry in the complex
ppfile=bdir+cid+'.pairpred.txt'

(auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(ppfile,usePDBidx=False)
#auc0,Mvc0,Mv,Mlc,lseq,rseq,lrV0,lrV,rrV0,rrV=postProcessAvg(cid,bdir,bdir)
#
#Mv[:10,:]=np.nan
#Mv[-10:,:]=np.nan
#Mv[:,:10]=np.nan
#Mv[:,-10:]=np.nan

Mvtbl=np.zeros(Mv.shape)
for (i,j) in P:
    Mvtbl[i,j]=1.0
Mvr=Mv.ravel()
Mvtblr=Mvtbl.ravel()
nidx=(~np.isnan(Mvr))
Mvr=Mvr[nidx]
Mvtblr=Mvtblr[nidx]
(fpv,tpv,aucv)=roc.roc(list(Mvr),list(Mvtblr))
print cid,"AUC =",aucv, "RFPP =",np.argmax(Mvtblr[np.argsort(-Mvr)]==1)
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=0)),list(np.nanmax(Mvtbl,axis=0))); print auc
(fpl,tpl,auc)=roc.roc(list(np.nanmax(Mv,axis=1)),list(np.nanmax(Mvtbl,axis=1))); print auc

Ejemplo n.º 26

0

Mostrar archivo

Archivo: getECstats.py Proyecto: foxtrotmike/pairpred

     dd=[]
     ld=[]
     md=[]
     for p in E.Pex[cid][0]:
         if p in mx:
             #myEdata.append([cid,p[0],p[1],1,mx[p],dx[p]])
             dd.append(dx[p])
             ld.append(+1)
             md.append(mx[p])
     for n in E.getNegEx(cid):
         if n in mx:
             #myEdata.append([cid,n[0],n[1],-1,mx[n],dx[n]])
             dd.append(dx[n])
             md.append(mx[n])
             ld.append(-1)
     (_,_,aa_di)=roc.roc(dd,ld)
     (_,_,aa_mi)=roc.roc(md,ld)
     Xauc.append([aa_mi,aa_di])
     print cid,ncs,lstats[0].shape[0],rstats[0].shape[0],Xauc[-1],Xuauc[-1],Lauc[-2:]
     #pdb.set_trace()
 if(myid!=0):
     comm.send(myEdata, dest=0)
 else:
     """
     for p in range(1,nprocs):
         myEdata.extend(comm.recv(source=p))
     output = open(ofname, 'wb')
     cPickle.dump(myEdata, output,-1)        
     output.close()     
     if evalROC:
         MV=[]

Ejemplo n.º 27

0

Mostrar archivo

Archivo: resultsObjects.py Proyecto: chaitanyambilgikar/B551-Fall-2012

    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue

Ejemplo n.º 28

0

Mostrar archivo

Archivo: resultsObjects.py Proyecto: silasxue/Sentiment-Analysis

    def getROC(self, rocN = None) :

        rocTP, rocFP, rocValue = roc_module.roc(self.decisionFunc, self.givenY,
                                                rocN, self.rocNormalization)
        return rocValue

Ejemplo n.º 29

0

Mostrar archivo

Archivo: aucPSSM.py Proyecto: foxtrotmike/pairpred

"""
Created on Wed Nov 27 08:40:33 2013

@author: root
"""
from myPDB import *
from getExamplesDBD_breakup import *
from PyML.evaluators.roc import roc

E=getExamplesDBD.loader(os.path.join('../../DBD4CSPKL/PKL','ENS_15_35_50.lbl.pkl'))
pdbdir='../../DBD4CSPKL/PDB_all_'
pkldir='../../DBD4CSPKL/PKL'
F=list(set([getFileParts(g)[1].split('.')[0] for g in glob.glob(os.path.join(pkldir,'*.pdb.pkl'))]))
A={}
for fid in F:    
    print fid
    X=myPDB.loader(os.path.join(pkldir,fid+'.pdb.pkl'))
    C=np.max(X.pssm,axis=0)
    #C=X.rasa#np.sum(.psfm,axis=0)
    #C=JSON2ConsScore(ipdbfile, jfile)        
    fcids=[k for k in E.Pex.keys() if (fid in k)]
    fPi=[]
    for c in fcids:        
        fPi.extend([i[int(c[0]!=fid)] for i in E.Pex[c][0]])
    fPi=np.unique(np.array(fPi))
    if len(fPi):
        
        L=np.zeros(len(C))
        L[fPi]=1.0
        A[fid]=roc(list(C),list(L))[-1]

Ejemplo n.º 30

0

Mostrar archivo

Archivo: analyzeLOOCV_par.py Proyecto: foxtrotmike/pairpred

def getAUC4Protein(lrV):
    vl=map(list, zip(*lrV.values()));vv=vl[0];ll=vl[1]    
    (_,_,a)=roc.roc(vv,ll)
    vv=np.array(vv)
    ll=np.array(ll)
    return (a,vv,ll)

Ejemplo n.º 31

0

Mostrar archivo

        except:
            continue
    #Get our results
    Mo=np.zeros((len(L.S2Ri),len(R.S2Ri)))
    Mo.fill(np.nan)
    ifile=cdir+'/InterPRed_prediction/2X000.InterPRed.txt'

    for ln in open(ifile,'r'):
        lns=ln.split()
        r=int(lns[3])
        c=int(lns[8])
        v=float(lns[10])
        Mo[r,c]=v    
    """
    print 'MI1- auc',roc.roc(MI1,trbs)[-1]
    print 'Shandar - auc',roc.roc(np.nanmax(M,axis=0),trbs)[-1]
    print 'Our - auc',roc.roc(np.nanmax(Mo,axis=0),trbs)[-1]
    plotdv(rseqn,trbs)
    plotdv(rseqn,np.nanmax(M,axis=0))
    plotdv(rseqn,np.nanmax(Mo,axis=0))
    #plotdv(rseqn,MI1)
    plt.show()
    """
    plt.plot([0,1],[0, 1],'k:',linewidth=2.0)
    (fp,tp,auc)=roc.roc(list(M.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'r-.',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(MI1,list(np.nanmax(Mt,axis=0)));plt.plot(fp,tp,'g--',linewidth=2.0);print auc
    (fp,tp,auc)=roc.roc(list(Mo.flatten()),list(Mt.flatten()));plt.plot(fp,tp,'b-',linewidth=2.0);print auc
    plt.grid()
    plt.xlabel('FPR')
    plt.ylabel('TPR')
    plt.legend(['Random : 50.0','PPiPP : 54.0','MI-1 : 59.6','PAIRPred : 63.8'],loc=0);plt.title('EF-CAM Results');plt.show()