def DFG_dihedral_byrun(project, runs, def_DFG): dihedral = [] dihedral_combinetrajs = [] print "Working on project %s." % project for run in range(runs): trajectories = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/%d/run%d-clone*.h5" % (project, run)) print "Run %s has %s trajectories." % (run, len(trajectories)) for traj in trajectories: dihedral_combinetrajs.append(md.compute_dihedrals(traj, [def_DFG])) # flatten dihedral_combinetrajs = [ val for sublist in dihedral_combinetrajs for val in sublist ] dihedral.append(dihedral_combinetrajs) dihedral_combinetrajs = [] dihedral = np.asarray([dihedral]) return [dihedral]
def shukla_coords_byrun(files,KER,Aloop,SRC2): difference = [] rmsd = [] difference_combinetrajs = [] rmsd_combinetrajs = [] path_base = files.split('*')[0] clone0_files = "%s/*clone0.h5" % path_base globfiles = glob(clone0_files) runs_list = [] for filename in globfiles: run_string = re.search('run([^-]+)',filename).group(1) run = int(run_string) if run not in runs_list: runs_list.append(run) runs_list.sort() for run in runs_list: trajectories = dataset.MDTrajDataset("%s/run%d-clone*1.h5" % (path_base,run)) print "Run %s has %s trajectories." % (run,len(trajectories)) for traj in trajectories: # append difference k295e310 = md.compute_contacts(traj, [KER[0]]) e310r409 = md.compute_contacts(traj, [KER[1]]) difference_combinetrajs.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append rmsd Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1])) SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2) traj_cut = traj.atom_slice(Activation_Loop_kinase) rmsd_combinetrajs.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm # flatten list of arrays difference_combinetrajs = np.asarray([val for sublist in difference_combinetrajs for val in sublist]) rmsd_combinetrajs = np.asarray([val for sublist in rmsd_combinetrajs for val in sublist]) difference.append(difference_combinetrajs) difference_combinetrajs = [] rmsd.append(rmsd_combinetrajs) rmsd_combinetrajs = [] return [rmsd, difference]
def DFG_dihedral_byrun(files,def_DFG): # Since we are going to sort files by where they are in first frame of clone0 # we can only analyze trajectories with a clone0 present. path_base = files.split('*')[0] clone0_files = "%s/*clone0.h5" % path_base globfiles = glob(clone0_files) runs_list = [] for filename in globfiles: run_string = re.search('run([^-]+)',filename).group(1) run = int(run_string) if run not in runs_list: runs_list.append(run) runs_list.sort() dihedral = [] dihedral_combinetrajs = [] for run in runs_list: trajectories = dataset.MDTrajDataset("%s/run%d-*.h5" % (path_base,run)) print "Run %s has %s trajectories." % (run,len(trajectories)) for traj in trajectories: dihedral_combinetrajs.append(md.compute_dihedrals(traj,[def_DFG])) # flatten dihedral_combinetrajs = [val for sublist in dihedral_combinetrajs for val in sublist] dihedral.append(dihedral_combinetrajs) dihedral_combinetrajs = [] dihedral = np.asarray([dihedral]) return [dihedral]
import pandas as pd from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition from sklearn.pipeline import make_pipeline import mdtraj as md tica_lagtime = 1600 trajectories = dataset.MDTrajDataset("./trajectories/*.h5") t0 = trajectories[0][0] dih = dataset.NumpyDirDataset("./dihedrals/") X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime) Xf = np.concatenate(X) tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime) dih_model = utils.load("./dihedrals/model.pkl") d = dih_model.describe_features(t0) d = pd.DataFrame(d) d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]] d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
# import libraries import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import numpy as np from msmbuilder import dataset import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") # load trajectories Abl_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/11400/*.h5") # import 2SRC structure to compare to SRC2 = md.load("ABL_2SRC_A.pdb") # Define hydrogen bond coordinates (0-indexed) KER_abl = [[29,44],[44,144]] # Define Activation loop (resid) Aloop_abl = [140,160] def shukla_coords(trajectories,KER,Aloop,SRC2): difference = [] rmsd = []
import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import seaborn as sns import numpy as np import pandas as pd from msmbuilder import dataset # load trajectories #Abl_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/10472/run0-clone0.h5") Src_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/10471/run0-clone0.h5") # load test trajectories #Abl_trajectories = dataset.MDTrajDataset("../../sim-snippets/dozen_frames_abl.xtc", topology="../../sim-snippets/abl_ref.pdb") #Src_trajectories = dataset.MDTrajDataset("../../sim-snippets/dozen_frames_src.xtc") # define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma) Abl_AD = [2257,2255,2265,2270] Abl_DF = [2267,2265,2277,2282] Abl_FG = [2279,2277,2297,2300] Abl_GL = [2300,2297,2304,2309] Abl_LS = [2306,2304,2323,2330] Abl_SR = [2325,2323,2334,2339] Abl_RL = [2336,2334,2358,2363] Abl_LM = [2360,2358,2377,2382]
def DFG_KER_byrun(files, KER, def_DFG): difference = [] DFG = [] difference_combinetrajs = [] DFG_combinetrajs = [] for file in files: print 'working on %s' % file trajectories = dataset.MDTrajDataset(file, topology=top) for traj in trajectories: topology = traj.topology # append difference KER_K_atoms = topology.select(KER[0]) KER_E_atoms = topology.select(KER[1]) KER_R_atoms = topology.select(KER[2]) KER_K = convert_atom_list_to_resid(KER_K_atoms, topology) KER_E = convert_atom_list_to_resid(KER_E_atoms, topology) KER_R = convert_atom_list_to_resid(KER_R_atoms, topology) #print 'Atom distances computed between %s, %s, and %s' %(topology.residue(KER_K),topology.residue(KER_E),topology.residue(KER_R)) # note the default for compute_contacts is 'closest-heavy' k295e310 = md.compute_contacts(traj, [[KER_K, KER_E]]) e310r409 = md.compute_contacts(traj, [[KER_E, KER_R]]) difference_combinetrajs.append( 10 * (e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm # append DFG def_DFG_atom_1 = topology.select(def_DFG[0]) def_DFG_atom_2 = topology.select(def_DFG[1]) #print 'Atom distances computed between %s and %s' %(topology.atom(def_DFG_atom_1),topology.atom(def_DFG_atom_2)) def_DFG_atoms = [def_DFG_atom_1[0], def_DFG_atom_2[0]] #print 'These correspond to atom numbers %s.' %def_DFG_atoms DFG_combinetrajs.append(md.compute_distances( traj, [def_DFG_atoms])) # flatten list of arrays difference_combinetrajs = np.asarray( [val for sublist in difference_combinetrajs for val in sublist]) DFG_combinetrajs = np.asarray( [val for sublist in DFG_combinetrajs for val in sublist]) difference.append(difference_combinetrajs) difference_combinetrajs = [] DFG.append(DFG_combinetrajs) DFG_combinetrajs = [] return [DFG, difference]
return indices[0] def compute_torsion(traj, *args): """ Compute the specified torsion. """ indices = [get_atom_index(traj, selection) for selection in args] min_frame = 400 end_frame = len(traj) short_traj = traj.slice(range(min_frame, end_frame), copy=False) # Compute torsion in degrees torsions = md.compute_dihedrals(short_traj, [indices]).squeeze() * (180.0 / np.pi) return torsions if __name__ == "__main__": trajectories = dataset.MDTrajDataset( '/cbio/jclab/home/albaness/trajectories2/AURKA/%s/*/*.h5' % condition) torsion1_list = [] torsion2_list = [] for traj_in in trajectories: torsion1 = compute_torsion(traj_in, *['(resSeq %d and name CA)' % resSeq for resSeq in (282, 283, 284, 285)]) torsion1_list.extend(torsion1) torsion2 = compute_torsion(traj_in, *['(resSeq %d and name CA)' % resSeq for resSeq in (283, 284, 285, 286)]) torsion2_list.extend(torsion2) np.save('./data/dihedral/dihedral_%s-%s-%s.npy' % (condition, 282, 285), torsion1_list) np.save('./data/dihedral/dihedral_%s-%s-%s.npy' % (condition, 283, 286), torsion2_list)
import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import numpy as np from msmbuilder import dataset import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") # load trajectories trajectories = dataset.MDTrajDataset("../ipynbs/trajectories/*.h5") WIG = md.load("../original-models/3WIG_model.pdb") AN2 = md.load("../original-models/4AN2_model.pdb") EQD = md.load("../original-models/3EQD_model.pdb") EQI = md.load("../original-models/3EQI_model.pdb") EQG = md.load("../original-models/3EQG_model.pdb") ORN = md.load("../original-models/3ORN_model.pdb") def catkhrd(trajectories): # define empty lists D218 = [] D222 = []
import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import seaborn as sns import numpy as np from msmbuilder import dataset # load trajectories Abl_trajectories = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11400/*.h5") Src_trajectories = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11401/*.h5") Abl_trajectories_0 = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11400/*clone0.h5") Src_trajectories_0 = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11401/*clone0.h5") # define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma) Abl_DFG = [2257, 2255, 2265, 2270] Src_DFG = [2190, 2188, 2198, 2203] def DFG_dihedral(trajectories, def_DFG): dihedral = []
import sys from msmbuilder import dataset import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") # Define project. project = sys.argv[1] # Define kinase. kinase = sys.argv[2] # load trajectories trajectories = dataset.MDTrajDataset( "/cbio/jclab/projects/fah/fah-data/munged/no-solvent/%s/*.h5" % project) # import 2SRC structure to compare to SRC2 = md.load("%s_2SRC_A.pdb" % kinase) # Define hydrogen bond coordinates (0-indexed) KER_hbond = { 'SRC': [[28, 43], [43, 142]], 'ABL': [[29, 44], [44, 144]], 'DDR1': [[51, 68], [68, 185]] } # Define Activation loop (resid) Aloop_def = {'SRC': [138, 158], 'ABL': [140, 160], 'DDR1': [181, 201]}
print "*** kinalysis: analyzing project %s (%s) BY RUNS ***" % ( args.project, protein) else: print "*** kinalysis: analyzing project %s (%s) ***" % (args.project, protein) else: myproject = 'no project' protein = 'SRC' files = "trajectories/*.h5" newpath = "./results/%s" % protein if not os.path.exists(newpath): os.makedirs(newpath) # Define our trajectories trajectories = dataset.MDTrajDataset(files) ### LETS FIND OUT SOME THINGS ABOUT ALL OF OUR TRAJECTORIES. print "This script is analyzing %s simulations." % len(trajectories) sim_num = len(trajectories) lens = [] max_length = 0 for i, traj in enumerate(trajectories): if len(traj) > max_length: max_length = len(traj) if len(traj) == max_length: longest_traj = traj print i lens.append(len(traj))
#### import matplotlib.pyplot as plt import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") import json import argparse # run inputs.py before running this script # DEFINE YOUR INPUTS files = "trajectories-ck2/CK2*.pdb" trajectories = dataset.MDTrajDataset(files) protein = 'CK2' project = '11406' #### END DEFINE INPUTS #### # Make shukla plot with open('KER_hbond.json', 'r') as fp: KER_hbond = json.load(fp) with open('Aloop_def.json', 'r') as fp: Aloop_def = json.load(fp) def shukla_coords(trajectories, KER, Aloop, SRC2):
# import libraries import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import numpy as np from msmbuilder import dataset import seaborn as sns sns.set_style("whitegrid") sns.set_context("poster") # load trajectories Shukla_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/shukla-trajectories/*part.pdb") # import 2SRC structure to compare to SRC2 = md.load("SRC_2SRC_A.pdb") # Define hydrogen bond coordinates (0-indexed) KER_src = [[35,50],[50,149]] # Define Activation loop (resid) Aloop_src = [145,165] def shukla_coords(trajectories,KER,Aloop,SRC2): difference = [] rmsd = []
import matplotlib matplotlib.use('Agg') import mdtraj as md import matplotlib.pyplot as plt import seaborn as sns import numpy as np from msmbuilder import dataset # load trajectories Abl_in = dataset.MDTrajDataset("Abl_DFG_in/ABL1*.pdb") Abl_out = dataset.MDTrajDataset("Abl_DFG_out/ABL1*.pdb") Src_in = dataset.MDTrajDataset("Src_DFG_in/SRC*.pdb") Src_out = dataset.MDTrajDataset("Src_DFG_out/SRC*.pdb") # define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma) #These are with hydrogens #Abl_DFG = [2257,2255,2265,2270] #Src_DFG = [2190,2188,2198,2203] # Below are the dihedral coordinates in PDBs without hydrogens Abl_DFG = [1117, 1116, 1121, 1123] Src_DFG = [1074, 1073, 1078, 1080] def DFG_dihedral(trajectories, def_DFG): dihedral = []