def parallelRun(N,pdbpklpath,pppath,ofname=None,comm=None,myid=0,nprocs=1): """ Wrapper for running computeDistMeansForComplex in parallel """ cids=incids csize=int(np.ceil(len(cids)/float(nprocs))) gclist=list(chunks(cids,csize)) mycids=gclist[myid] A={} for cid in mycids: print "Processing",cid try: A[cid]=computeDistMeansForComplex(cid,N,pdbpklpath,pppath) except Exception as ee: print "Error processing", cid,ee, traceback.format_exc() continue Ascores=None if(myid!=0): comm.send(A, dest=0) if(myid==0): gcK=[A] for p in range(1,nprocs): gcK.append(comm.recv(source=p)) Ascores=mergeDicts(gcK) if ofname is not None: Pickle.dump(ofname,Ascores) print "Saved scores file:",ofname return Ascores
def onFileTriggered(self, event): if str(event.text()) == "Save": Pickle.dump(self.view, "current.pkl") if str(event.text()) == "Open": self.view.scene.clear() self.setMode("free") Pickle.load(self.view, "current.pkl")
def batchExtract(pkldir, bdir, ofname): """ Running the information required for all files """ import glob flist = glob.glob(pkldir + '*.pdb.pkl') TT = len(flist) + 0.0 if os.path.isfile(ofname) is False: fdict = {} else: fdict = myPickle.load(ofname) for cnt, f in enumerate(flist): print '% Done =', cnt / TT (_, k, _) = getFileParts(getFileParts(f)[1]) #pdb.set_trace() k = k[:-2] if k not in fdict: print "Processing", f try: U = myPDB.loader(pkldir + k + '_u.pdb.pkl') B = myPDB.loader(pkldir + k + '_b.pdb.pkl') except: continue pdb.set_trace() #rmsd,Uidx,Bidx=calcRMSD(U,B) try: rpymol = calcRMSD_pymol(bdir + k + '_u.pdb', bdir + k + '_b.pdb') except: print "Error processing", k cmd.reinitialize() time.sleep(0.1) continue #pdb.set_trace() #useq=''.join([three_to_one(U.R[i].get_resname()) for i in Uidx]) #bseq=''.join([three_to_one(B.R[i].get_resname()) for i in Bidx]) #a_useq=ProteinAnalysis(U.seq) #a_bseq=ProteinAnalysis(B.seq) #asa_u=np.sum([U.ASA[i] for i in Uidx]) #asa_b=np.sum([B.ASA[i] for i in Bidx]) fdict[ k] = rpymol #+(BN.nanmean(U.B),BN.nanmean(B.B),BN.nanmedian(U.B),BN.nanmedian(B.B),BN.nanmax(U.B),BN.nanmax(B.B)) #pdb.set_trace() myPickle.dump(ofname, fdict) print k, rpymol[0] else: print "Already found", f return fdict
def parallelRun(N, pdbpklpath, pppath, ofname=None, comm=None, myid=0, nprocs=1): """ Wrapper for running computeDistMeansForComplex in parallel """ cids = incids csize = int(np.ceil(len(cids) / float(nprocs))) gclist = list(chunks(cids, csize)) mycids = gclist[myid] A = {} for cid in mycids: print "Processing", cid try: A[cid] = computeDistMeansForComplex(cid, N, pdbpklpath, pppath) except Exception as ee: print "Error processing", cid, ee, traceback.format_exc() continue Ascores = None if (myid != 0): comm.send(A, dest=0) if (myid == 0): gcK = [A] for p in range(1, nprocs): gcK.append(comm.recv(source=p)) Ascores = mergeDicts(gcK) if ofname is not None: Pickle.dump(ofname, Ascores) print "Saved scores file:", ofname return Ascores
lD=getDistMat(getCoords(L.R)) rD=getDistMat(getCoords(R.R)) lM=np.max(lD) rM=np.max(rD) lD=lD/lM rD=rD/rM D=[] for k0,(l0,r0) in enumerate(pex): for l1,r1 in pex[k0+1:]: d=np.max((lD[l0,l1],rD[r0,r1])) D.append(d) C=C+np.histogram(D,bins)[0] except Exception as e: print "Error",e continue mPickle.dump(ofname,(bins,C)) else: (bins,C)=mPickle.load(ofname) bb=(bins[1:]+bins[:-1])/2 idx=bb<=1 bb=bb[idx] C=C[idx] plt.plot(bb,C,'b',linewidth=2);plt.grid(); plt.xlabel('Normalized pairwise distance (d)'); plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs',color='b'); ax1=plt.gca() ax2 = ax1.twinx() ax2.plot(bb, np.cumsum(C)/np.sum(C), 'r.-',linewidth=2) ax2.set_ylabel('Cumulative proporion of pairs of simultaneosuly interacting residue pairs', color='r') plt.show()
def saveMIFile(M, ofile): """ save the MI variables in a zipped pickle binary file """ cPickle.dump(ofile, M)
lD = getDistMat(getCoords(L.R)) rD = getDistMat(getCoords(R.R)) lM = np.max(lD) rM = np.max(rD) lD = lD / lM rD = rD / rM D = [] for k0, (l0, r0) in enumerate(pex): for l1, r1 in pex[k0 + 1:]: d = np.max((lD[l0, l1], rD[r0, r1])) D.append(d) C = C + np.histogram(D, bins)[0] except Exception as e: print "Error", e continue mPickle.dump(ofname, (bins, C)) else: (bins, C) = mPickle.load(ofname) bb = (bins[1:] + bins[:-1]) / 2 idx = bb <= 1 bb = bb[idx] C = C[idx] plt.plot(bb, C, 'b', linewidth=2) plt.grid() plt.xlabel('Normalized pairwise distance (d)') plt.ylabel('Number of pairs of simultaneosuly interacting residue pairs', color='b') ax1 = plt.gca() ax2 = ax1.twinx() ax2.plot(bb, np.cumsum(C) / np.sum(C), 'r.-', linewidth=2)
SPECIAL = chars.copy() for c in NORMAL: if c in chars: SPECIAL.pop(c) SPECIAL = list(SPECIAL.keys()) TOT = { c:i for i, c in enumerate(['\n'] + NORMAL) } TOT.update( {c:len(TOT) for c in SPECIAL} ) else: TOT = { c:i for i, c in enumerate(['\n'] + list(chars.keys())) } ##################################### # WRITE CHARS MAP ON FILE CM_OUT = os.path.join(HOME, 'charmap.pickle') myPickle.dump(CM_OUT, TOT) ##################################### # CONVERT PASSWORDS IN INDEX X = [string2index(x, MAX_LEN, TOT) for x in X] ##################################### # CREATE WRITE TEST IN FILE TEST_OUT = os.path.join(HOME, 'rfX') Xtest = np.array(X) Xtest = np.squeeze(Xtest) rank = np.array(rank)[:, None] rfX = np.concatenate((rank, F[:, None], Xtest), 1) np.save(TEST_OUT, rfX) #####################################
for p in range(1,nprocs): (DNTP_p,dsA_p,LV_p,LVP_p,TPS_p)=comm.recv(source=p) dsAr.append(dsA_p) DNTP.extend(DNTP_p) LV.extend(LV_p) LVP.extend(LVP_p) TPS.extend(TPS_p) dsA=mergeDicts(dsAr) print 'Number of complexes',len(dsA) #print 'Complex wise AUC = ',np.mean(dA.values()) p12=map(list,zip(*dsA.values()));pa=p12[0];p1=p12[1];p2=p12[2];ps=p1;ps.extend(p2); print 'Complex Wise AUC =',np.mean(pa),'Protein Wise AUC =',np.mean(ps) if not auconly: (fplv,tplv,auclv)=roc.roc_VA(LV) (fplvp,tplvp,auclvp)=roc.roc_VA(LVP) mkl.dump(ofname,((fplv,tplv,auclv),(fplvp,tplvp,auclvp),dsA)) print "Results file saved",ofname print "AUC = ",auclv """ plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show() [np.sum(dn<2.0) for dn in DNTP] cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs] [dsA[cid] for cid in cids] [dAo[cid] for cid in cids] """ #DISTRIBUTION PLOT dthresh=[0,1,2,3,4] # sequence distance threshold XX=calcRFPP(np.array(TPS)[:,1]+1,DNTP,dthresh=dthresh) if doplot: plt.figure();plt.plot(fplv,tplv);plt.xlabel('FP');plt.ylabel('TP');plt.grid();plt.title('ROC Curve: AUC = %1.2f' % (auclv*100)) plt.figure();plt.boxplot(tuple(XX),bootstrap=1000,positions=dthresh);plt.xlabel('Sequence Distance (D) from a TP'); plt.ylabel('Minimum rank of a prediction within distance D of a TP' );plt.title('Results of soft sequence distance threshold');plt.grid();plt.yticks(range(0,201,10));
r=(pap+rp,panp+rnp)#+(100*np.mean(Ml[~np.isnan(Ml)]>0),Mv.shape[0],Mv.shape[1]) #(auc,Mv,Ml,lseq,rseq,lrV,rrV)=readFile(fname) except Exception as e: print e print '-'*60 print '###PROCESSSING FAILED FOR ',cid,e traceback.print_exc(file=sys.stdout) print '-'*60 r=np.nan rfpp.append(r) print cid,rfpp R[cid]=rfpp # (AUC, AUCL, AUCR, NTP,RFP)_post,(AUC, AUCL, AUCR, NTP,RFP)_no_post,pp, percentage of positives, |L|,|R| myPickle.dump('DBD4_SGD_CENTPW71.res.pkl',R) #V=np.array([r for r in R.values() if ~np.any(np.isnan(r))]) #import scipy.stats #scipy.stats.wilcoxon(V[:,0]-V[:,1]) #cc=['1KTZ', '2OOB'] #Ro={}; #for k in R: # if k not in cc: # Ro[k]=[] # for m in range(len(R[k])): # Ro[k].extend(R[k][m][0]+R[k][m][1]) # #mV=np.mean(Ro.values(),axis=0) #print mV
k0 = (three_to_one(Lu.R[a].get_resname()), three_to_one(Ru.R[b].get_resname())) k1 = (three_to_one(Ru.R[b].get_resname()), three_to_one(Lu.R[a].get_resname())) except KeyError: continue Ncnt[k0] = Ncnt[k0] + 1 Ncnt[k1] = Ncnt[k1] + 1 addASA(cid, True, lPex, APcnt, Lu, Lb, Lu2b) addASA(cid, False, rPex, APcnt, Ru, Rb, Ru2b) addASA(cid, True, lNex, ANcnt, Lu, Lb, Lu2b) addASA(cid, False, rNex, ANcnt, Ru, Rb, Ru2b) #pdb.set_trace() myPickle.dump(ofname, (Pcnt, Ncnt, APcnt, ANcnt, TAC)) else: print "Using existing file", ofname (Pcnt, Ncnt, APcnt, ANcnt, TAC) = myPickle.load(ofname) Pm = getMtx(Pcnt) Nm = getMtx(Ncnt) v = np.atleast_2d(np.sum(Nm, axis=0) + np.sum(Pm, axis=0)) Ex = np.sum(Pm) * ((v * v.T) / np.sum((v * v.T))) # pp = ((Pm - Ex)**2) / (Ex) #pp=(Pm/np.sum(Pm))/(Nm/np.sum(Nm)) pp = np.log2(Pm / Ex) print categ, calc_gini(pp.flatten())
def saveMIFile(M,ofile): """ save the MI variables in a zipped pickle binary file """ cPickle.dump(ofile, M)
TPS.extend(TPS_p) dsA = mergeDicts(dsAr) print 'Number of complexes', len(dsA) #print 'Complex wise AUC = ',np.mean(dA.values()) p12 = map(list, zip(*dsA.values())) pa = p12[0] p1 = p12[1] p2 = p12[2] ps = p1 ps.extend(p2) print 'Complex Wise AUC =', np.mean(pa), 'Protein Wise AUC =', np.mean( ps) if not auconly: (fplv, tplv, auclv) = roc.roc_VA(LV) (fplvp, tplvp, auclvp) = roc.roc_VA(LVP) mkl.dump(ofname, ((fplv, tplv, auclv), (fplvp, tplvp, auclvp), dsA)) print "Results file saved", ofname print "AUC = ", auclv """ plt.hist(np.array(DNTP).flatten(),[0,1,2,3,4,5,6,1000],cumulative=True);plt.grid();plt.xlabel('sequence distance');plt.ylabel('counts');plt.title('Number of top 200 predictions vs. sequence distance from nearest true positive');plt.show() [np.sum(dn<2.0) for dn in DNTP] cids=[getFileParts(getFileParts(ifile)[1])[1] for ifile in fs] [dsA[cid] for cid in cids] [dAo[cid] for cid in cids] """ #DISTRIBUTION PLOT dthresh = [0, 1, 2, 3, 4] # sequence distance threshold XX = calcRFPP(np.array(TPS)[:, 1] + 1, DNTP, dthresh=dthresh) if doplot: plt.figure() plt.plot(fplv, tplv)