def _make_groups(sites, nmol, mol): # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) codes = np.array(template.codes) names = np.array(template.names) # Setup sites/Groups maxmem = 0 grouplbl = [] for site in sites: sidx = [template.residues.index(int(r)) for r in site.split(",")] maxmem = max(maxmem,len(sidx)) grouplbl.append(",".join(["%s%d"%(codes[idx],residues[idx]) for idx in sidx])) groups = np.zeros((maxmem,len(sites)*nmol),dtype=int)-1 offset = 0 groupi = 0 for site in sites: ntake = len(site.split(",")) for mi in range(nmol): gidx = [offset+mi+nmol*i for i in range(ntake)] groups[:len(gidx),groupi] = np.asarray(gidx) groupi += 1 offset += ntake*nmol return groups, grouplbl
def _make_helical(state,mol) : """ Convert a residual state matrix to a helical state matrix by looking at the residues in the helices. Parameters ---------- state : nxR Numpy array the state matrix, n is the number of snapshots R is the number of residues mol : string the identifier of the molecule Returns ------- nxH Numpy array the helical state matrix, H is the number of helices """ helices = gpcr_lib.load_template(mol).rhelices hstate = np.zeros([state.shape[0],len(helices)],dtype=np.uint8) for i,h in enumerate(helices) : hstate[:,i] = np.any(state[:,h[0]-1:h[1]],axis=1) return hstate
def _make_groups(sites, nmol, mol): # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) codes = np.array(template.codes) names = np.array(template.names) # Setup sites/Groups maxmem = 0 grouplbl = [] for site in sites: sidx = [template.residues.index(int(r)) for r in site.split(",")] maxmem = max(maxmem, len(sidx)) grouplbl.append(",".join( ["%s%d" % (codes[idx], residues[idx]) for idx in sidx])) groups = np.zeros((maxmem, len(sites) * nmol), dtype=int) - 1 offset = 0 groupi = 0 for site in sites: ntake = len(site.split(",")) for mi in range(nmol): gidx = [offset + mi + nmol * i for i in range(ntake)] groups[:len(gidx), groupi] = np.asarray(gidx) groupi += 1 offset += ntake * nmol return groups, grouplbl
def _resjointcontacts(filename,label,mat,repeats,out,mol) : """ Main analysis routine Parameters ---------- filename : string file to analyse label : string label for the group mat : string matrix identifier repeats : list of string replacement pattern for multiple repeats out : string output prefix mol : string protein identifier """ # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) residues0 = np.arange(1,residues.shape[0]+1) codes = np.array(template.codes) names = np.array(template.names) pcontacts = [] npz = np.load(filename) pjoint0 = npz["joint"+mat] pjoint = np.zeros([len(repeats),pjoint0.shape[0],pjoint0.shape[0]]) pjoint[0,:,:] = pjoint0 # Do the same for multiple repeats and average over them if repeats is not None : for ri,r in enumerate(repeats[1:],1) : filename2 = filename.replace(repeats[0],r) npz = np.load(filename2) pjoint[ri,:,:] = npz["joint"+mat] pjoint_std = pjoint.std(axis=0)/np.sqrt(len(repeats)) f2d = plt.figure(2,tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(),residues0,residues,pjoint_std) f2d.colorbar(C) f2d.savefig("%s_%s_2d_std.png"%(out,label),format="png") # Draw a 2D residue-residue joint probability plot f2d = plt.figure(1,tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(),residues0,residues,pjoint.mean(axis=0)) f2d.colorbar(C) f2d.gca().text(1.04,1.01,"p(A,B)",transform=f2d.gca().transAxes) f2d.savefig("%s_%s_2d.png"%(out,label),format="png") f2d = plt.figure(3,tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(),residues0,residues,pjoint.mean(axis=0),logit=True) f2d.colorbar(C) f2d.gca().text(1.02,1.02,"ln p(A,B)",transform=f2d.gca().transAxes) f2d.savefig("%s_%s_2d_log.png"%(out,label),format="png")
def _rescontacts(filenames, labels, repeats, out, mol, time, every): """ Main analysis routine Parameters ---------- filenames : list of strings the group of files to analyse labels : list of string the label for each group repeats : list of string replacement pattern for multiple repeats within each group out : string output prefix mol : string protein identifier time : float the total simulation time every : int the reading frequency """ # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) residues0 = np.arange(1, residues.shape[0] + 1) codes = np.array(template.codes) names = np.array(template.names) pcontacts = [] for fi, (filename, label) in enumerate(zip(filenames, labels)): # Read the state file from disc and perform pairwise contact analysis states = [gpcr_lib.read_statefile(filename, every)] pp = pycontacts.pairwise_contacts(states[-1]) # Do the same for multiple repeats and average over them if repeats is not None: pcontacts.append(np.zeros([len(repeats), pp.shape[0], pp.shape[1]])) pcontacts[-1][0, :, :] = pp for ri, r in enumerate(repeats[1:], 1): filename2 = filename.replace(repeats[0], r) states.append(gpcr_lib.read_statefile(filename2, every)) pp = pycontacts.pairwise_contacts(states[-1]) pcontacts[-1][ri, :, :] = pp pcontacts[-1] = pcontacts[-1].mean(axis=0) * 100.0 else: pcontacts.append(pp * 100.0) # Conversion factor from snapshot lifetime to ns lifetime ns_per_snapshot = time / float(states[0].shape[0]) # Calculate lifetimes all_states = np.concatenate(states, axis=1) life_av, life_max = pycontacts.lifetime(all_states) life_av = np.reshape(life_av, [len(repeats), residues.shape[0]]).mean( axis=0) * ns_per_snapshot life_max = np.reshape(life_max, [len(repeats), residues.shape[0] ]).mean(axis=0) * ns_per_snapshot print "Median av-lifetime = %.3f" % np.median(life_av) print "Average av-lifetime = %.3f" % np.mean(life_av) print "Median max-lifetime = %.3f" % np.median(life_max) print "Average max-lifetime = %.3f" % np.mean(life_max) # Draw a 2D residue-residue joint probability plot f2d = plt.figure(10 + fi) _draw_2d(f2d.gca(), residues0, residues, pcontacts[-1]) f2d.savefig("%s_%s_2d.png" % (out, label), format="png") # Draw a residue contact probability plot f1d = plt.figure(20 + fi, figsize=(6.85, 3.41), tight_layout=True) _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, pcontacts[-1].diagonal(), "Contact probability", 80) f1d.savefig("%s_%s_1d.png" % (out, label), format="png", dpi=300) # And print it out do disc with open("%s_%s_1d.txt" % (out, label), "w") as f: for (name, res, prob) in zip(names, residues, pcontacts[-1].diagonal()): f.write("%s%d %8.3f\n" % (name.capitalize(), res, prob)) # Draw a average lifetime plot f1d = plt.figure(40 + fi, figsize=(6.85, 3.41), tight_layout=True) _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, life_av, "Average lifetime (ns)", 0.6) f1d.savefig("%s_%s_avlife.png" % (out, label), format="png") # Draw a maximum lifetime plot f1d = plt.figure(50 + fi, figsize=(6.85, 3.41), tight_layout=True) _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, life_max, "Maximum lifetime (ns)", 35) f1d.savefig("%s_%s_maxlife.png" % (out, label), format="png") # Plot residue-type averaged occupancies faa = plt.figure(30) _draw_aa(faa.gca(), names, pcontacts, labels) faa.savefig(args.out + "_aa.png", format="png")
cholmolfile = open(prefix + "_chol.mstate.%.0f.dat" % (args.cutoff), "wb") cholresfile = open(prefix + "_chol.resstate.%.0f.dat" % (args.cutoff), "wb") cholburresfile = open( prefix + "_chol.buried.rstate.%.0f.dat" % (args.cutoff), "wb") ohmolfile = open(prefix + "_chol-oh.mstate.%.0f.dat" % (args.cutoff), "wb") ohresfile = open(prefix + "_chol-oh.resstate.%.0f.dat" % (args.cutoff), "wb") ohburresfile = open( prefix + "_chol-oh.buried.rstate.%.0f.dat" % (args.cutoff), "wb") if args.reslist is not None: with open(args.reslist, "r") as f: lines = f.readlines() template = gpcr_lib.load_template(lines[0].strip()) reslist = [ template.residues.index(int(l.strip())) for l in lines[1:] ] reslistfile = open( prefix + "_chol-oh.resliststate.%.0f.dat" % (args.cutoff), "wb") else: reslist = None reslistfile = None if do_joint and len(chols) > 0: jointcom = np.zeros([len(residues), len(residues)]) jointcomburr = np.zeros([len(residues), len(residues)]) jointoh = np.zeros([len(residues), len(residues)]) jointohburr = np.zeros([len(residues), len(residues)])
def _rescontacts(filenames, labels, repeats, sites, out, mol, time, every, block=None): """ Main analysis routine Parameters ---------- filenames : list of strings the group of files to analyse labels : list of string the label for each group repeats : list of string replacement pattern for multiple repeats within each group sites : list of string sites to group and analyse out : string output prefix mol : string protein identifier time : float the total simulation time every : int the reading frequency block : tuple of int just do the calculation on a block of the series """ # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) residues0 = np.arange(1, residues.shape[0] + 1) codes = np.array(template.codes) names = np.array(template.names) # Setup analysis of sites/groups if sites: # If the first element is l, we will do a lead-analysis # i.e. the first residue in a site needs to be on if the whole # site is to be considered to be on if sites[0].lower() == "l": makefnc = pycontacts.make_group_series_lead sites = sites[1:] else: makefnc = pycontacts.make_group_series # Find maximum member and setup group labels maxmem = 0 grouplbl = [] for site in sites: sidx = [template.residues.index(int(r)) for r in site.split(",")] maxmem = max(maxmem, len(sidx)) grouplbl.append(",".join( ["%s%d" % (codes[idx], residues[idx]) for idx in sidx])) # Fill the np array with group indices or -1 groups = np.zeros((maxmem, len(sites)), dtype=int) - 1 for i, site in enumerate(sites): sidx = [template.residues.index(int(r)) for r in site.split(",")] groups[:len(sidx), i] = np.asarray(sidx) pcontacts = [] pcontacts_std = [] for fi, (filename, label) in enumerate(zip(filenames, labels)): # Read the state file from disc and perform pairwise contact analysis states = [gpcr_lib.read_statefile(filename, every, block)] pp = pycontacts.pairwise_contacts(states[-1]) if sites: gstates = [makefnc(states[-1], groups)] pg = pycontacts.pairwise_contacts(gstates[-1]).diagonal() # Do the same for multiple repeats and average over them if repeats is not None: pcontacts.append(np.zeros([len(repeats), pp.shape[0], pp.shape[1]])) pcontacts[-1][0, :, :] = pp if sites: pgroupcontacts = np.zeros([len(repeats), pg.shape[0]]) pgroupcontacts[0, :] = pg for ri, r in enumerate(repeats[1:], 1): filename2 = filename.replace(repeats[0], r) states.append(gpcr_lib.read_statefile(filename2, every, block)) pp = pycontacts.pairwise_contacts(states[-1]) pcontacts[-1][ri, :, :] = pp if sites: gstates.append(makefnc(states[-1], groups)) pg = pycontacts.pairwise_contacts(gstates[-1]).diagonal() pgroupcontacts[ri, :] = pg pcontacts_std.append(pcontacts[-1].std(axis=0) * 100.0 / np.sqrt(pcontacts[-1].shape[0])) pcontacts[-1] = pcontacts[-1].mean(axis=0) * 100.0 if sites: pgroupcontacts_std = pgroupcontacts.std( axis=0) * 100.0 / np.sqrt(pgroupcontacts.shape[0]) pgroupcontacts = pgroupcontacts.mean(axis=0) * 100.0 else: pcontacts.append(pp * 100.0) pgroupcontacts = pg * 100 if sites: print "Sites occupancies:" print "\n".join([ "\t%s: %.3f +- %.3f" % (lbl, pg, std) for pg, std, lbl in zip( pgroupcontacts, pgroupcontacts_std, grouplbl) ]) # Conversion factor from snapshot lifetime to ns lifetime print time if block is not None: time = time / float(block[0]) * block[1] print time ns_per_snapshot = time / float(states[0].shape[0]) # Calculate lifetimes all_states = np.concatenate(states, axis=1) life_av, life_max = pycontacts.lifetime(all_states) life_av = np.reshape(life_av, [len(repeats), residues.shape[0]]).mean( axis=0) * ns_per_snapshot life_max = np.reshape(life_max, [len(repeats), residues.shape[0] ]).mean(axis=0) * ns_per_snapshot print "Median av-lifetime = %.3f" % np.median(life_av) print "Average av-lifetime = %.3f" % np.mean(life_av) print "Median max-lifetime = %.3f" % np.median(life_max) print "Average max-lifetime = %.3f" % np.mean(life_max) if sites: all_gstates = np.concatenate(gstates, axis=1) glife_av, glife_max = pycontacts.lifetime(all_gstates) glife_av = np.reshape(glife_av, [len(repeats), groups.shape[1] ]).mean(axis=0) * ns_per_snapshot glife_max = np.reshape( glife_max, [len(repeats), groups.shape[1]]).mean(axis=0) * ns_per_snapshot print "Sites occ. av-lifetime max-lifetime:" print "\n".join([ "\t%s: %.3f\t%.3f\t%.3f" % (lbl, o, a, m) for o, a, m, lbl in zip(pgroupcontacts, glife_av, glife_max, grouplbl) ]) # Draw a 2D residue-residue joint probability plot f2d = plt.figure(10 + fi) _draw_2d(f2d.gca(), residues0, residues, pcontacts[-1]) f2d.savefig("%s_%s_2d.png" % (out, label), format="png", dpi=300) # Draw a residue contact probability plot f1d = plt.figure(20 + fi, figsize=(6.85, 3.41), tight_layout=True) p = 80 if label == "com" else 100 _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, pcontacts[-1].diagonal(), "Contact probability", p) f1d.savefig("%s_%s_1d.png" % (out, label), format="png", dpi=300) # Draw a average lifetime plot f1d = plt.figure(40 + fi, figsize=(6.85, 3.41), tight_layout=True) _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, life_av, "Average lifetime (ns)", 0.6) f1d.savefig("%s_%s_avlife.png" % (out, label), format="png", dpi=300) # Draw a maximum lifetime plot f1d = plt.figure(50 + fi, figsize=(6.85, 3.41), tight_layout=True) _draw_1d(f1d.gca(), residues0, residues, codes, template.rhelices, life_max, "Maximum lifetime (ns)", 35) f1d.savefig("%s_%s_maxlife.png" % (out, label), format="png", dpi=300) # And print it out do disc with open("%s_%s_1d.txt" % (out, label), "w") as f: for (name, res, prob, prob_std, rav, rmax) in zip(names, residues, pcontacts[-1].diagonal(), pcontacts_std[-1].diagonal(), life_av, life_max): f.write("%s%d\t%8.3f\t%8.3f\t%8.3f\t%8.3f\n" % (name.capitalize(), res, prob, prob_std, rav, rmax)) # Plot residue-type averaged occupancies faa = plt.figure(30) _draw_aa(faa.gca(), names, pcontacts, labels) faa.savefig(args.out + "_aa.png", format="png", dpi=300)
shortmolfile = open(prefix+"_short.mstate.%.0f.dat"%args.cutoff,"wb") shortresfile = open(prefix+"_short.resstate.%.0f.dat"%args.cutoff,"wb") longmolfile = open(prefix+"_long.mstate.%.0f.dat"%args.cutoff,"wb") longresfile = open(prefix+"_long.resstate.%.0f.dat"%args.cutoff,"wb") if do_contacts and len(chols) > 0 : cholmolfile = open(prefix+"_chol.mstate.%.0f.dat"%(args.cutoff),"wb") cholresfile = open(prefix+"_chol.resstate.%.0f.dat"%(args.cutoff),"wb") cholburresfile = open(prefix+"_chol.buried.rstate.%.0f.dat"%(args.cutoff),"wb") ohmolfile = open(prefix+"_chol-oh.mstate.%.0f.dat"%(args.cutoff),"wb") ohresfile = open(prefix+"_chol-oh.resstate.%.0f.dat"%(args.cutoff),"wb") ohburresfile = open(prefix+"_chol-oh.buried.rstate.%.0f.dat"%(args.cutoff),"wb") if args.reslist is not None: with open(args.reslist,"r") as f : lines = f.readlines() template = gpcr_lib.load_template(lines[0].strip()) reslist = [template.residues.index(int(l.strip())) for l in lines[1:]] reslistfile = open(prefix+"_chol-oh.resliststate.%.0f.dat"%(args.cutoff),"wb") else : reslist = None reslistfile = None if do_joint and len(chols) > 0 : jointcom = np.zeros([len(residues),len(residues)]) jointcomburr = np.zeros([len(residues),len(residues)]) jointoh = np.zeros([len(residues),len(residues)]) jointohburr = np.zeros([len(residues),len(residues)]) # Allocate matrices for buried analysis if do_countburied :
def _rescontacts(filenames,labels,repeats,out,mol,time,every) : """ Main analysis routine Parameters ---------- filenames : list of strings the group of files to analyse labels : list of string the label for each group repeats : list of string replacement pattern for multiple repeats within each group out : string output prefix mol : string protein identifier time : float the total simulation time every : int the reading frequency """ # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) residues0 = np.arange(1,residues.shape[0]+1) codes = np.array(template.codes) names = np.array(template.names) pcontacts = [] for fi,(filename,label) in enumerate(zip(filenames,labels)) : # Read the state file from disc and perform pairwise contact analysis states = [gpcr_lib.read_statefile(filename,every)] pp = pycontacts.pairwise_contacts(states[-1]) # Do the same for multiple repeats and average over them if repeats is not None : pcontacts.append(np.zeros([len(repeats),pp.shape[0],pp.shape[1]])) pcontacts[-1][0,:,:] = pp for ri,r in enumerate(repeats[1:],1) : filename2 = filename.replace(repeats[0],r) states.append(gpcr_lib.read_statefile(filename2,every)) pp = pycontacts.pairwise_contacts(states[-1]) pcontacts[-1][ri,:,:] = pp pcontacts[-1] = pcontacts[-1].mean(axis=0)*100.0 else : pcontacts.append(pp*100.0) # Conversion factor from snapshot lifetime to ns lifetime ns_per_snapshot = time / float(states[0].shape[0]) # Calculate lifetimes all_states = np.concatenate(states,axis=1) life_av,life_max = pycontacts.lifetime(all_states) life_av = np.reshape(life_av,[len(repeats),residues.shape[0]]).mean(axis=0)*ns_per_snapshot life_max = np.reshape(life_max,[len(repeats),residues.shape[0]]).mean(axis=0)*ns_per_snapshot print "Median av-lifetime = %.3f"%np.median(life_av) print "Average av-lifetime = %.3f"%np.mean(life_av) print "Median max-lifetime = %.3f"%np.median(life_max) print "Average max-lifetime = %.3f"%np.mean(life_max) # Draw a 2D residue-residue joint probability plot f2d = plt.figure(10+fi) _draw_2d(f2d.gca(),residues0,residues,pcontacts[-1]) f2d.savefig("%s_%s_2d.png"%(out,label),format="png") # Draw a residue contact probability plot f1d = plt.figure(20+fi,figsize=(6.85,3.41),tight_layout=True) _draw_1d(f1d.gca(),residues0,residues,codes,template.rhelices,pcontacts[-1].diagonal(),"Contact probability",80) f1d.savefig("%s_%s_1d.png"%(out,label),format="png",dpi=300) # And print it out do disc with open("%s_%s_1d.txt"%(out,label),"w") as f : for (name,res,prob) in zip(names,residues,pcontacts[-1].diagonal()) : f.write("%s%d %8.3f\n"%(name.capitalize(),res,prob)) # Draw a average lifetime plot f1d = plt.figure(40+fi,figsize=(6.85,3.41),tight_layout=True) _draw_1d(f1d.gca(),residues0,residues,codes,template.rhelices,life_av,"Average lifetime (ns)",0.6) f1d.savefig("%s_%s_avlife.png"%(out,label),format="png") # Draw a maximum lifetime plot f1d = plt.figure(50+fi,figsize=(6.85,3.41),tight_layout=True) _draw_1d(f1d.gca(),residues0,residues,codes,template.rhelices,life_max,"Maximum lifetime (ns)",35) f1d.savefig("%s_%s_maxlife.png"%(out,label),format="png") # Plot residue-type averaged occupancies faa = plt.figure(30) _draw_aa(faa.gca(),names,pcontacts,labels) faa.savefig(args.out+"_aa.png",format="png")
def _resjointcontacts(filename, label, mat, repeats, out, mol): """ Main analysis routine Parameters ---------- filename : string file to analyse label : string label for the group mat : string matrix identifier repeats : list of string replacement pattern for multiple repeats out : string output prefix mol : string protein identifier """ # Load the protein template to obtain residue information template = gpcr_lib.load_template(mol) residues = np.array(template.residues) residues0 = np.arange(1, residues.shape[0] + 1) codes = np.array(template.codes) names = np.array(template.names) pcontacts = [] npz = np.load(filename) pjoint0 = npz["joint" + mat] pjoint = np.zeros([len(repeats), pjoint0.shape[0], pjoint0.shape[0]]) pjoint[0, :, :] = pjoint0 # Do the same for multiple repeats and average over them if repeats is not None: for ri, r in enumerate(repeats[1:], 1): filename2 = filename.replace(repeats[0], r) npz = np.load(filename2) pjoint[ri, :, :] = npz["joint" + mat] pjoint_std = pjoint.std(axis=0) / np.sqrt(len(repeats)) f2d = plt.figure(2, tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(), residues0, residues, pjoint_std) f2d.colorbar(C) f2d.savefig("%s_%s_2d_std.png" % (out, label), format="png") # Draw a 2D residue-residue joint probability plot f2d = plt.figure(1, tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(), residues0, residues, pjoint.mean(axis=0)) f2d.colorbar(C) f2d.gca().text(1.04, 1.01, "p(A,B)", transform=f2d.gca().transAxes) f2d.savefig("%s_%s_2d.png" % (out, label), format="png") f2d = plt.figure(3, tight_layout=True) C = gpcr_lib.draw_joint2d(f2d.gca(), residues0, residues, pjoint.mean(axis=0), logit=True) f2d.colorbar(C) f2d.gca().text(1.02, 1.02, "ln p(A,B)", transform=f2d.gca().transAxes) f2d.savefig("%s_%s_2d_log.png" % (out, label), format="png")