def test_dump(): fh = open("%s/dump_01.test.dat" % outdir,'w') rvecs,resi = bb.dump_rvec(fname,cutoff=1.7) stri = "" for i1 in range(len(resi)): for i2 in range(len(resi)): if(sum((rvecs[0,i1,i2])**2) > 1.0e-5): stri += "%10s %10s %14e %14e %14e \n" % (resi[i1],resi[i2],rvecs[0,i1,i2,0],rvecs[0,i1,i2,1],rvecs[0,i1,i2,2]) fh.write(stri) fh.close() gvecs,resi = bb.dump_gvec(fname) fh = open("%s/dump_02.test.dat" % outdir,'w') stri = "" for i1 in range(len(resi)): for i2 in range(len(resi)): if(sum((gvecs[0,i1,i2])**2) > 1.0e-5): stri += "%10s %10s %14e %14e %14e %14e \n" % (resi[i1],resi[i2],gvecs[0,i1,i2,0],gvecs[0,i1,i2,1],gvecs[0,i1,i2,2],gvecs[0,i1,i2,3]) fh.write(stri) fh.close() comp("%s/dump_01.test.dat" % refdir) comp("%s/dump_02.test.dat" % refdir)
def test_cluster(): # first, calculate all g-vectors print("# Calculating G-vectors") gvec,seq = bb.dump_gvec(traj,top) lent = gvec.shape[0] gvec = gvec.reshape(lent,-1)[::5] print("# Calculating PCA. gvec shape: ", gvec.shape) # calculate PCA v,w = cc.pca(gvec,nevecs=3) print("# Cumulative explained variance of component: 1=%5.1f 2:=%5.1f 3=%5.1f" % (v[0]*100,v[1]*100,v[2]*100)) print("# DBSCAN clustering...") # do DBSCAN clustering. eps and min_samples need to be adjusted. new_labels, center_idx = cc.dbscan(gvec,range(gvec.shape[0]),eps=0.6/np.sqrt(8.),min_samples=10) print("DONE!") # create color palette. gray, small points for unassigned clusters. #cp = sns.color_palette("hls",len(center_idx)+1) #colors = [cp[j-1] if(j!=0) else (0.77,0.77,0.77) for j in new_labels] size = [5 if(j!=0) else 0.25 for j in new_labels] # do scatterplot #plt.scatter(w[:,0],w[:,1],s=size,c=colors) # now dump centroids and print labels on plot print("# Dump PDB centroids") t = md.load(traj, top=top) idxs = [ii for ii,kk in enumerate(new_labels) if(kk==0)] for i,k in enumerate(center_idx): t[k].save_pdb("%s/cluster_%03d.test.pdb" % (outdir,i)) comp("%s/cluster_%03d.test.pdb" % (refdir,i)) #plt.text(w[k,0],w[k,1],str(i),ha='center',va='center') idxs = [ii for ii,kk in enumerate(new_labels) if(kk==i+1)]
def test_dump(): fh = open("%s/dump_01.test.dat" % outdir, 'w') rvecs, resi = bb.dump_rvec(fname, cutoff=1.7) stri = "" for i1 in range(len(resi)): for i2 in range(len(resi)): if (sum((rvecs[0, i1, i2])**2) > 1.0e-5): stri += "%10s %10s %14e %14e %14e \n" % ( resi[i1], resi[i2], rvecs[0, i1, i2, 0], rvecs[0, i1, i2, 1], rvecs[0, i1, i2, 2]) fh.write(stri) fh.close() gvecs, resi = bb.dump_gvec(fname) fh = open("%s/dump_02.test.dat" % outdir, 'w') stri = "" for i1 in range(len(resi)): for i2 in range(len(resi)): if (sum((gvecs[0, i1, i2])**2) > 1.0e-5): stri += "%10s %10s %14e %14e %14e %14e \n" % ( resi[i1], resi[i2], gvecs[0, i1, i2, 0], gvecs[0, i1, i2, 1], gvecs[0, i1, i2, 2], gvecs[0, i1, i2, 3]) fh.write(stri) fh.close() comp("%s/dump_01.test.dat" % refdir) comp("%s/dump_02.test.dat" % refdir)
def test_smm_1(): fname = "%s/test/data/UUCG.pdb" % cwd traj = "%s/test/data/UUCG.xtc" % cwd gvec, seq = bb.dump_gvec(traj, topology=fname) lent = gvec.shape[0] gvec = gvec.reshape(lent, -1)[::5] s = smm.SMM(gvec, eps=0.5)
def test_smm_1(): fname = "%s/test/data/UUCG.pdb" % cwd traj = "%s/test/data/UUCG.xtc" % cwd gvec,seq = bb.dump_gvec(traj,topology=fname) lent = gvec.shape[0] gvec = gvec.reshape(lent,-1)[::5] s = smm.SMM(gvec,eps=0.5)
def dump(args): assert args.dumpR or args.dumpG, "# ERROR. choose --dumpR and/or --dumpG" if(args.dumpR): stri_r = "# %s \n" % (" ".join(sys.argv[:])) stri_r += "#%15s %15s %11s %11s %11s \n" % ("RES1","RES2","x","y","z") if(args.top==None): for i in range(len(args.pdbs)): rvecs,resi = bb.dump_rvec(args.pdbs[i],cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) stri_r += "# PDB %s \n" % args.pdbs[i].split("/")[-1] stri_r += "".join([" %15s %15s %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[0,i1,i2,0],rvecs[0,i1,i2,1],rvecs[0,i1,i2,2]) for i1,i2 in idxs if(sum(rvecs[0,i1,i2]**2)> 1.E-05)]) else: rvecs,resi = bb.dump_rvec(args.trj,topology=args.top,cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) for i in range(len(rvecs)): stri_r += "# Frame %d \n" % i stri_r += "".join([" %15s %15s %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[i,i1,i2,0],rvecs[i,i1,i2,1],rvecs[i,i1,i2,2]) for i1,i2 in idxs if(sum(rvecs[i,i1,i2]**2)> 1.E-05)]) fh = open(args.name + ".rvec.out",'w') fh.write(stri_r) fh.close() if(args.dumpG): stri_g = "# %s \n" % (" ".join(sys.argv[:])) stri_g += "#%15s %15s %11s %11s %11s %11s \n" % ("RES1","RES2","G0","G1","G2","G3") if(args.top==None): for i in range(len(args.pdbs)): rvecs,resi = bb.dump_gvec(args.pdbs[i],cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) stri_g += "# PDB %s \n" % args.pdbs[i].split("/")[-1] stri_g += "".join([" %15s %15s %11.4e %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[0,i1,i2,0],rvecs[0,i1,i2,1],rvecs[0,i1,i2,2],rvecs[0,i1,i2,3]) for i1,i2 in idxs if(sum(rvecs[0,i1,i2]**2)> 1.E-05)]) else: rvecs,resi = bb.dump_rvec(args.trj,topology=args.top,cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) for i in range(len(rvecs)): stri_g += "# Frame %d \n" % i stri_g += "".join([" %15s %15s %11.4e %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[i,i1,i2,0],rvecs[i,i1,i2,1],rvecs[i,i1,i2,2],rvecs[i,i1,i2,3]) for i1,i2 in idxs if(sum(rvecs[i,i1,i2]**2)> 1.E-05)]) fh = open(args.name + ".gvec.out",'w') fh.write(stri_g) fh.close()
def dump(args): assert args.dumpR or args.dumpG, "# ERROR. choose --dumpR and/or --dumpR" if(args.dumpR): stri_r = "# %s \n" % (" ".join(sys.argv[:])) stri_r += "#%15s %15s %11s %11s %11s \n" % ("RES1","RES2","x","y","z") if(args.top==None): for i in range(len(args.pdbs)): rvecs,resi = bb.dump_rvec(args.pdbs[i],cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) stri_r += "# PDB %s \n" % args.pdbs[i].split("/")[-1] stri_r += "".join([" %15s %15s %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[0,i1,i2,0],rvecs[0,i1,i2,1],rvecs[0,i1,i2,2]) for i1,i2 in idxs if(sum(rvecs[0,i1,i2]**2)> 1.E-05)]) else: rvecs,resi = bb.dump_rvec(args.trj,topology=args.top,cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) for i in range(len(rvecs)): stri_r += "# Frame %d \n" % i stri_r += "".join([" %15s %15s %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[i,i1,i2,0],rvecs[i,i1,i2,1],rvecs[i,i1,i2,2]) for i1,i2 in idxs if(sum(rvecs[i,i1,i2]**2)> 1.E-05)]) fh = open(args.name + ".rvec.out",'w') fh.write(stri_r) fh.close() if(args.dumpG): stri_g = "# %s \n" % (" ".join(sys.argv[:])) stri_g += "#%15s %15s %11s %11s %11s %11s \n" % ("RES1","RES2","G0","G1","G2","G3") if(args.top==None): for i in range(len(args.pdbs)): rvecs,resi = bb.dump_gvec(args.pdbs[i],cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) stri_g += "# PDB %s \n" % args.pdbs[i].split("/")[-1] stri_g += "".join([" %15s %15s %11.4e %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[0,i1,i2,0],rvecs[0,i1,i2,1],rvecs[0,i1,i2,2],rvecs[0,i1,i2,3]) for i1,i2 in idxs if(sum(rvecs[0,i1,i2]**2)> 1.E-05)]) else: rvecs,resi = bb.dump_rvec(args.trj,topology=args.top,cutoff=args.cutoff) idxs = its.permutations(range(len(resi)), 2) for i in range(len(rvecs)): stri_g += "# Frame %d \n" % i stri_g += "".join([" %15s %15s %11.4e %11.4e %11.4e %11.4e \n" % (resi[i1],resi[i2],rvecs[i,i1,i2,0],rvecs[i,i1,i2,1],rvecs[i,i1,i2,2],rvecs[i,i1,i2,3]) for i1,i2 in idxs if(sum(rvecs[i,i1,i2]**2)> 1.E-05)]) fh = open(args.name + ".gvec.out",'w') fh.write(stri_g) fh.close()
def test_cluster(): # first, calculate all g-vectors print("# Calculating G-vectors") gvec, seq = bb.dump_gvec(traj, top) lent = gvec.shape[0] gvec = gvec.reshape(lent, -1)[::5] print("# Calculating PCA. gvec shape: ", gvec.shape) # calculate PCA v, w = cc.pca(gvec, nevecs=3) print( "# Cumulative explained variance of component: 1=%5.1f 2:=%5.1f 3=%5.1f" % (v[0] * 100, v[1] * 100, v[2] * 100)) print("# DBSCAN clustering...") # do DBSCAN clustering. eps and min_samples need to be adjusted. new_labels, center_idx = cc.dbscan(gvec, range(gvec.shape[0]), eps=0.6 / np.sqrt(8.), min_samples=10) print("DONE!") # create color palette. gray, small points for unassigned clusters. #cp = sns.color_palette("hls",len(center_idx)+1) #colors = [cp[j-1] if(j!=0) else (0.77,0.77,0.77) for j in new_labels] size = [5 if (j != 0) else 0.25 for j in new_labels] # do scatterplot #plt.scatter(w[:,0],w[:,1],s=size,c=colors) # now dump centroids and print labels on plot print("# Dump PDB centroids") t = md.load(traj, top=top) idxs = [ii for ii, kk in enumerate(new_labels) if (kk == 0)] for i, k in enumerate(center_idx): t[k].save_pdb("%s/cluster_%03d.test.pdb" % (outdir, i)) comp("%s/cluster_%03d.test.pdb" % (refdir, i)) #plt.text(w[k,0],w[k,1],str(i),ha='center',va='center') idxs = [ii for ii, kk in enumerate(new_labels) if (kk == i + 1)]