Example #1
0
def parallelRun(N,pdbpklpath,pppath,ofname=None,comm=None,myid=0,nprocs=1):
    """
    Wrapper for running computeDistMeansForComplex in parallel
    """
    cids=incids     
    csize=int(np.ceil(len(cids)/float(nprocs)))
    gclist=list(chunks(cids,csize))
    
    mycids=gclist[myid]
    A={}
    for cid in mycids:
        print "Processing",cid        
        try:
            A[cid]=computeDistMeansForComplex(cid,N,pdbpklpath,pppath)
            
        except Exception as ee:
            print "Error processing", cid,ee, traceback.format_exc()
            continue        
    Ascores=None
    if(myid!=0):
        comm.send(A, dest=0)        
    if(myid==0):
        gcK=[A]
        for p in range(1,nprocs):
            gcK.append(comm.recv(source=p))            
        Ascores=mergeDicts(gcK)
        if ofname is not None:            
            Pickle.dump(ofname,Ascores)
            print "Saved scores file:",ofname
    return Ascores       
Example #2
0
    def onFileTriggered(self, event):
        if str(event.text()) == "Save":
            Pickle.dump(self.view, "current.pkl")

        if str(event.text()) == "Open":
            self.view.scene.clear()
            self.setMode("free")
            Pickle.load(self.view, "current.pkl")
Example #3
0
    def onFileTriggered(self, event):
        if str(event.text()) == "Save":
            Pickle.dump(self.view, "current.pkl")

        if str(event.text()) == "Open":
            self.view.scene.clear()
            self.setMode("free")
            Pickle.load(self.view, "current.pkl")
Example #4
0
def batchExtract(pkldir, bdir, ofname):
    """
    Running the information required for all files
    """
    import glob

    flist = glob.glob(pkldir + '*.pdb.pkl')
    TT = len(flist) + 0.0
    if os.path.isfile(ofname) is False:
        fdict = {}
    else:
        fdict = myPickle.load(ofname)
    for cnt, f in enumerate(flist):
        print '% Done =', cnt / TT
        (_, k, _) = getFileParts(getFileParts(f)[1])
        #pdb.set_trace()
        k = k[:-2]
        if k not in fdict:
            print "Processing", f
            try:
                U = myPDB.loader(pkldir + k + '_u.pdb.pkl')
                B = myPDB.loader(pkldir + k + '_b.pdb.pkl')
            except:
                continue
            pdb.set_trace()
            #rmsd,Uidx,Bidx=calcRMSD(U,B)
            try:
                rpymol = calcRMSD_pymol(bdir + k + '_u.pdb',
                                        bdir + k + '_b.pdb')
            except:
                print "Error processing", k
                cmd.reinitialize()
                time.sleep(0.1)
                continue

            #pdb.set_trace()
            #useq=''.join([three_to_one(U.R[i].get_resname()) for i in Uidx])
            #bseq=''.join([three_to_one(B.R[i].get_resname()) for i in Bidx])
            #a_useq=ProteinAnalysis(U.seq)
            #a_bseq=ProteinAnalysis(B.seq)
            #asa_u=np.sum([U.ASA[i] for i in Uidx])
            #asa_b=np.sum([B.ASA[i] for i in Bidx])
            fdict[
                k] = rpymol  #+(BN.nanmean(U.B),BN.nanmean(B.B),BN.nanmedian(U.B),BN.nanmedian(B.B),BN.nanmax(U.B),BN.nanmax(B.B))
            #pdb.set_trace()
            myPickle.dump(ofname, fdict)
            print k, rpymol[0]
        else:
            print "Already found", f
    return fdict
Example #5
0
def parallelRun(N,
                pdbpklpath,
                pppath,
                ofname=None,
                comm=None,
                myid=0,
                nprocs=1):
    """
    Wrapper for running computeDistMeansForComplex in parallel
    """
    cids = incids
    csize = int(np.ceil(len(cids) / float(nprocs)))
    gclist = list(chunks(cids, csize))

    mycids = gclist[myid]
    A = {}
    for cid in mycids:
        print "Processing", cid
        try:
            A[cid] = computeDistMeansForComplex(cid, N, pdbpklpath, pppath)

        except Exception as ee:
            print "Error processing", cid, ee, traceback.format_exc()
            continue
    Ascores = None
    if (myid != 0):
        comm.send(A, dest=0)
    if (myid == 0):
        gcK = [A]
        for p in range(1, nprocs):
            gcK.append(comm.recv(source=p))
        Ascores = mergeDicts(gcK)
        if ofname is not None:
            Pickle.dump(ofname, Ascores)
            print "Saved scores file:", ofname
    return Ascores
            lD=getDistMat(getCoords(L.R))
            rD=getDistMat(getCoords(R.R))
            lM=np.max(lD)
            rM=np.max(rD)
            lD=lD/lM
            rD=rD/rM
            D=[]
            for k0,(l0,r0) in enumerate(pex):
                for l1,r1 in pex[k0+1:]:
                    d=np.max((lD[l0,l1],rD[r0,r1]))
                    D.append(d)
            C=C+np.histogram(D,bins)[0]
        except Exception as e:
            print "Error",e
            continue
    mPickle.dump(ofname,(bins,C))
else:    
    (bins,C)=mPickle.load(ofname)
    
    bb=(bins[1:]+bins[:-1])/2
    idx=bb<=1
    bb=bb[idx]
    C=C[idx]
    plt.plot(bb,C,'b',linewidth=2);plt.grid();
    plt.xlabel('Normalized pairwise distance (d)');
    plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs',color='b');
    ax1=plt.gca()    
    ax2 = ax1.twinx()
    ax2.plot(bb, np.cumsum(C)/np.sum(C), 'r.-',linewidth=2)
    ax2.set_ylabel('Cumulative proporion of pairs of simultaneosuly interacting residue pairs', color='r')
    plt.show()
Example #7
0
def saveMIFile(M, ofile):
    """
    save the MI variables in a zipped pickle binary file
    """
    cPickle.dump(ofile, M)
Example #8
0
            lD = getDistMat(getCoords(L.R))
            rD = getDistMat(getCoords(R.R))
            lM = np.max(lD)
            rM = np.max(rD)
            lD = lD / lM
            rD = rD / rM
            D = []
            for k0, (l0, r0) in enumerate(pex):
                for l1, r1 in pex[k0 + 1:]:
                    d = np.max((lD[l0, l1], rD[r0, r1]))
                    D.append(d)
            C = C + np.histogram(D, bins)[0]
        except Exception as e:
            print "Error", e
            continue
    mPickle.dump(ofname, (bins, C))
else:
    (bins, C) = mPickle.load(ofname)

    bb = (bins[1:] + bins[:-1]) / 2
    idx = bb <= 1
    bb = bb[idx]
    C = C[idx]
    plt.plot(bb, C, 'b', linewidth=2)
    plt.grid()
    plt.xlabel('Normalized pairwise distance (d)')
    plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs',
               color='b')
    ax1 = plt.gca()
    ax2 = ax1.twinx()
    ax2.plot(bb, np.cumsum(C) / np.sum(C), 'r.-', linewidth=2)
Example #9
0
        SPECIAL = chars.copy()

        for c in NORMAL:
            if c in chars:
                SPECIAL.pop(c)
        SPECIAL = list(SPECIAL.keys())

        TOT = { c:i for i, c in enumerate(['\n'] + NORMAL) }
        TOT.update( {c:len(TOT) for c in SPECIAL} )
    else:
        TOT = { c:i for i, c in enumerate(['\n'] + list(chars.keys())) }
    #####################################
    
    # WRITE CHARS MAP ON FILE 
    CM_OUT = os.path.join(HOME, 'charmap.pickle')
    myPickle.dump(CM_OUT, TOT)
    #####################################
    
    
    # CONVERT PASSWORDS IN INDEX
    X = [string2index(x, MAX_LEN, TOT) for x in X]
    #####################################
    
    # CREATE WRITE TEST IN FILE
    TEST_OUT = os.path.join(HOME, 'rfX')
    Xtest = np.array(X)
    Xtest = np.squeeze(Xtest)
    rank = np.array(rank)[:, None]
    rfX = np.concatenate((rank, F[:, None], Xtest), 1) 
    np.save(TEST_OUT, rfX)
    #####################################
 for p in range(1,nprocs):
     (DNTP_p,dsA_p,LV_p,LVP_p,TPS_p)=comm.recv(source=p)
     dsAr.append(dsA_p)
     DNTP.extend(DNTP_p)
     LV.extend(LV_p)
     LVP.extend(LVP_p)
     TPS.extend(TPS_p)
 dsA=mergeDicts(dsAr)
 print 'Number of complexes',len(dsA)
 #print 'Complex wise AUC = ',np.mean(dA.values())
 p12=map(list,zip(*dsA.values()));pa=p12[0];p1=p12[1];p2=p12[2];ps=p1;ps.extend(p2);
 print 'Complex Wise AUC =',np.mean(pa),'Protein Wise AUC =',np.mean(ps)  
 if not auconly:
     (fplv,tplv,auclv)=roc.roc_VA(LV) 
     (fplvp,tplvp,auclvp)=roc.roc_VA(LVP) 
     mkl.dump(ofname,((fplv,tplv,auclv),(fplvp,tplvp,auclvp),dsA))
     print "Results file saved",ofname
     print "AUC = ",auclv
     """        
         plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show()
         [np.sum(dn<2.0) for dn in DNTP]
         cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs]
         [dsA[cid] for cid in cids]
         [dAo[cid] for cid in cids]
     """ 
     #DISTRIBUTION PLOT
     dthresh=[0,1,2,3,4] # sequence distance threshold    
     XX=calcRFPP(np.array(TPS)[:,1]+1,DNTP,dthresh=dthresh)
     if doplot:
         plt.figure();plt.plot(fplv,tplv);plt.xlabel('FP');plt.ylabel('TP');plt.grid();plt.title('ROC Curve: AUC =  %1.2f' % (auclv*100))
         plt.figure();plt.boxplot(tuple(XX),bootstrap=1000,positions=dthresh);plt.xlabel('Sequence Distance (D) from a TP'); plt.ylabel('Minimum rank of a prediction within distance D of a TP' );plt.title('Results of soft sequence distance threshold');plt.grid();plt.yticks(range(0,201,10));
Example #11
0
            r=(pap+rp,panp+rnp)#+(100*np.mean(Ml[~np.isnan(Ml)]>0),Mv.shape[0],Mv.shape[1])
            #(auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(fname)
        except Exception as e:
            print e
            print '-'*60
            print '###PROCESSSING FAILED FOR ',cid,e
            traceback.print_exc(file=sys.stdout)
            print '-'*60            
            r=np.nan
        rfpp.append(r)
    print cid,rfpp
    R[cid]=rfpp


# (AUC, AUCL, AUCR, NTP,RFP)_post,(AUC, AUCL, AUCR, NTP,RFP)_no_post,pp, percentage of positives, |L|,|R|
myPickle.dump('DBD4_SGD_CENTPW71.res.pkl',R)    
#V=np.array([r for r in R.values() if ~np.any(np.isnan(r))])    
#import scipy.stats
#scipy.stats.wilcoxon(V[:,0]-V[:,1])

#cc=['1KTZ',  '2OOB']

#Ro={};
#for k in R:
#    if k not in cc:
#        Ro[k]=[]
#        for m in range(len(R[k])):
#            Ro[k].extend(R[k][m][0]+R[k][m][1])
#        
#mV=np.mean(Ro.values(),axis=0)        
#print mV
                    k0 = (three_to_one(Lu.R[a].get_resname()),
                          three_to_one(Ru.R[b].get_resname()))
                    k1 = (three_to_one(Ru.R[b].get_resname()),
                          three_to_one(Lu.R[a].get_resname()))
                except KeyError:
                    continue
                Ncnt[k0] = Ncnt[k0] + 1
                Ncnt[k1] = Ncnt[k1] + 1

            addASA(cid, True, lPex, APcnt, Lu, Lb, Lu2b)
            addASA(cid, False, rPex, APcnt, Ru, Rb, Ru2b)
            addASA(cid, True, lNex, ANcnt, Lu, Lb, Lu2b)
            addASA(cid, False, rNex, ANcnt, Ru, Rb, Ru2b)

            #pdb.set_trace()
        myPickle.dump(ofname, (Pcnt, Ncnt, APcnt, ANcnt, TAC))
    else:
        print "Using existing file", ofname
        (Pcnt, Ncnt, APcnt, ANcnt, TAC) = myPickle.load(ofname)

    Pm = getMtx(Pcnt)
    Nm = getMtx(Ncnt)

    v = np.atleast_2d(np.sum(Nm, axis=0) + np.sum(Pm, axis=0))
    Ex = np.sum(Pm) * ((v * v.T) / np.sum((v * v.T)))
    #
    pp = ((Pm - Ex)**2) / (Ex)

    #pp=(Pm/np.sum(Pm))/(Nm/np.sum(Nm))
    pp = np.log2(Pm / Ex)
    print categ, calc_gini(pp.flatten())
Example #13
0
def saveMIFile(M,ofile):
    """
    save the MI variables in a zipped pickle binary file
    """
    cPickle.dump(ofile, M)
     TPS.extend(TPS_p)
 dsA = mergeDicts(dsAr)
 print 'Number of complexes', len(dsA)
 #print 'Complex wise AUC = ',np.mean(dA.values())
 p12 = map(list, zip(*dsA.values()))
 pa = p12[0]
 p1 = p12[1]
 p2 = p12[2]
 ps = p1
 ps.extend(p2)
 print 'Complex Wise AUC =', np.mean(pa), 'Protein Wise AUC =', np.mean(
     ps)
 if not auconly:
     (fplv, tplv, auclv) = roc.roc_VA(LV)
     (fplvp, tplvp, auclvp) = roc.roc_VA(LVP)
     mkl.dump(ofname,
              ((fplv, tplv, auclv), (fplvp, tplvp, auclvp), dsA))
     print "Results file saved", ofname
     print "AUC = ", auclv
     """        
         plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show()
         [np.sum(dn<2.0) for dn in DNTP]
         cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs]
         [dsA[cid] for cid in cids]
         [dAo[cid] for cid in cids]
     """
     #DISTRIBUTION PLOT
     dthresh = [0, 1, 2, 3, 4]  # sequence distance threshold
     XX = calcRFPP(np.array(TPS)[:, 1] + 1, DNTP, dthresh=dthresh)
     if doplot:
         plt.figure()
         plt.plot(fplv, tplv)