Beispiel #1
0
def DFG_dihedral_byrun(project, runs, def_DFG):

    dihedral = []
    dihedral_combinetrajs = []
    print "Working on project %s." % project

    for run in range(runs):

        trajectories = dataset.MDTrajDataset(
            "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/%d/run%d-clone*.h5"
            % (project, run))
        print "Run %s has %s trajectories." % (run, len(trajectories))

        for traj in trajectories:

            dihedral_combinetrajs.append(md.compute_dihedrals(traj, [def_DFG]))
        # flatten
        dihedral_combinetrajs = [
            val for sublist in dihedral_combinetrajs for val in sublist
        ]

        dihedral.append(dihedral_combinetrajs)
        dihedral_combinetrajs = []

    dihedral = np.asarray([dihedral])

    return [dihedral]
Beispiel #2
0
def shukla_coords_byrun(files,KER,Aloop,SRC2):

    difference = []
    rmsd = []

    difference_combinetrajs = []
    rmsd_combinetrajs = []

    path_base = files.split('*')[0]
    clone0_files = "%s/*clone0.h5" % path_base
    globfiles = glob(clone0_files)

    runs_list = []

    for filename in globfiles:
        run_string = re.search('run([^-]+)',filename).group(1)
        run = int(run_string)
        if run not in runs_list:
            runs_list.append(run)
        runs_list.sort()


    for run in runs_list:

        trajectories = dataset.MDTrajDataset("%s/run%d-clone*1.h5" % (path_base,run))
        print "Run %s has %s trajectories." % (run,len(trajectories))

        for traj in trajectories:

            # append difference
            k295e310 = md.compute_contacts(traj, [KER[0]])
            e310r409 = md.compute_contacts(traj, [KER[1]])
            difference_combinetrajs.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm

            # append rmsd
            Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))
            Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))

            SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2)
            traj_cut = traj.atom_slice(Activation_Loop_kinase)

            rmsd_combinetrajs.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm

        # flatten list of arrays
        difference_combinetrajs = np.asarray([val for sublist in difference_combinetrajs for val in sublist])
        rmsd_combinetrajs = np.asarray([val for sublist in rmsd_combinetrajs for val in sublist])

        difference.append(difference_combinetrajs)
        difference_combinetrajs = []

        rmsd.append(rmsd_combinetrajs)
        rmsd_combinetrajs = []

    return [rmsd, difference]
Beispiel #3
0
def DFG_dihedral_byrun(files,def_DFG):

    # Since we are going to sort files by where they are in first frame of clone0
    #   we can only analyze trajectories with a clone0 present.

    path_base = files.split('*')[0]

    clone0_files = "%s/*clone0.h5" % path_base

    globfiles = glob(clone0_files)

    runs_list = []

    for filename in globfiles:
        run_string = re.search('run([^-]+)',filename).group(1)
        run = int(run_string)
        if run not in runs_list:
            runs_list.append(run)
        runs_list.sort()

    dihedral = []
    dihedral_combinetrajs = []

    for run in runs_list:

        trajectories = dataset.MDTrajDataset("%s/run%d-*.h5" % (path_base,run))
        print "Run %s has %s trajectories." % (run,len(trajectories))

        for traj in trajectories:

            dihedral_combinetrajs.append(md.compute_dihedrals(traj,[def_DFG]))
        # flatten
        dihedral_combinetrajs = [val for sublist in dihedral_combinetrajs for val in sublist]

        dihedral.append(dihedral_combinetrajs)
        dihedral_combinetrajs = []

    dihedral = np.asarray([dihedral])

    return [dihedral]
Beispiel #4
0
import pandas as pd
from msmbuilder import example_datasets, cluster, msm, featurizer, lumping, utils, dataset, decomposition
from sklearn.pipeline import make_pipeline
import mdtraj as md

tica_lagtime = 1600

trajectories = dataset.MDTrajDataset("./trajectories/*.h5")
t0 = trajectories[0][0]

dih = dataset.NumpyDirDataset("./dihedrals/")
X = dataset.dataset("./tica/tica%d.h5" % tica_lagtime)
Xf = np.concatenate(X)

tica_model = utils.load("./tica/tica%d.pkl" % tica_lagtime)
dih_model = utils.load("./dihedrals/model.pkl")


d = dih_model.describe_features(t0)
d = pd.DataFrame(d)

d.ix[argsort(tica_model.eigenvectors_[:, 0])[0:5]]
d.ix[argsort(tica_model.eigenvectors_[:, 0])[-5:]]
Beispiel #5
0
# import libraries
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import numpy as np

from msmbuilder import dataset

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# load trajectories
Abl_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/11400/*.h5")

# import 2SRC structure to compare to
SRC2 = md.load("ABL_2SRC_A.pdb")

# Define hydrogen bond coordinates (0-indexed)
KER_abl = [[29,44],[44,144]]

# Define Activation loop (resid)
Aloop_abl = [140,160]

def shukla_coords(trajectories,KER,Aloop,SRC2):

    difference = []
    rmsd = []
Beispiel #6
0
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd

from msmbuilder import dataset

# load trajectories

#Abl_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/10472/run0-clone0.h5")
Src_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/fah/fah-data/munged/no-solvent/10471/run0-clone0.h5")

# load test trajectories

#Abl_trajectories = dataset.MDTrajDataset("../../sim-snippets/dozen_frames_abl.xtc", topology="../../sim-snippets/abl_ref.pdb")
#Src_trajectories = dataset.MDTrajDataset("../../sim-snippets/dozen_frames_src.xtc")

# define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma)

Abl_AD = [2257,2255,2265,2270]
Abl_DF = [2267,2265,2277,2282]
Abl_FG = [2279,2277,2297,2300]
Abl_GL = [2300,2297,2304,2309]
Abl_LS = [2306,2304,2323,2330]
Abl_SR = [2325,2323,2334,2339]
Abl_RL = [2336,2334,2358,2363]
Abl_LM = [2360,2358,2377,2382]
Beispiel #7
0
def DFG_KER_byrun(files, KER, def_DFG):

    difference = []
    DFG = []

    difference_combinetrajs = []
    DFG_combinetrajs = []

    for file in files:

        print 'working on %s' % file

        trajectories = dataset.MDTrajDataset(file, topology=top)

        for traj in trajectories:

            topology = traj.topology

            # append difference
            KER_K_atoms = topology.select(KER[0])
            KER_E_atoms = topology.select(KER[1])
            KER_R_atoms = topology.select(KER[2])

            KER_K = convert_atom_list_to_resid(KER_K_atoms, topology)
            KER_E = convert_atom_list_to_resid(KER_E_atoms, topology)
            KER_R = convert_atom_list_to_resid(KER_R_atoms, topology)

            #print 'Atom distances computed between %s, %s, and %s' %(topology.residue(KER_K),topology.residue(KER_E),topology.residue(KER_R))

            # note the default for compute_contacts is 'closest-heavy'
            k295e310 = md.compute_contacts(traj, [[KER_K, KER_E]])
            e310r409 = md.compute_contacts(traj, [[KER_E, KER_R]])

            difference_combinetrajs.append(
                10 * (e310r409[0] -
                      k295e310[0]))  # 10x because mdtraj is naturally in nm

            # append DFG
            def_DFG_atom_1 = topology.select(def_DFG[0])
            def_DFG_atom_2 = topology.select(def_DFG[1])

            #print 'Atom distances computed between %s and %s' %(topology.atom(def_DFG_atom_1),topology.atom(def_DFG_atom_2))
            def_DFG_atoms = [def_DFG_atom_1[0], def_DFG_atom_2[0]]
            #print 'These correspond to atom numbers %s.' %def_DFG_atoms

            DFG_combinetrajs.append(md.compute_distances(
                traj, [def_DFG_atoms]))

        # flatten list of arrays
        difference_combinetrajs = np.asarray(
            [val for sublist in difference_combinetrajs for val in sublist])
        DFG_combinetrajs = np.asarray(
            [val for sublist in DFG_combinetrajs for val in sublist])

        difference.append(difference_combinetrajs)
        difference_combinetrajs = []

        DFG.append(DFG_combinetrajs)
        DFG_combinetrajs = []

    return [DFG, difference]
Beispiel #8
0
    return indices[0]


def compute_torsion(traj, *args):
    """
    Compute the specified torsion.
    """
    indices = [get_atom_index(traj, selection) for selection in args]
    min_frame = 400
    end_frame = len(traj)
    short_traj = traj.slice(range(min_frame, end_frame), copy=False)
    # Compute torsion in degrees
    torsions = md.compute_dihedrals(short_traj, [indices]).squeeze() * (180.0 / np.pi)

    return torsions


if __name__ == "__main__":
    trajectories = dataset.MDTrajDataset(
        '/cbio/jclab/home/albaness/trajectories2/AURKA/%s/*/*.h5' % condition)
    torsion1_list = []
    torsion2_list = []
    for traj_in in trajectories:
        torsion1 = compute_torsion(traj_in, *['(resSeq %d and name CA)' % resSeq for resSeq in (282, 283, 284, 285)])
        torsion1_list.extend(torsion1)
        torsion2 = compute_torsion(traj_in,
                                   *['(resSeq %d and name CA)' % resSeq for resSeq in (283, 284, 285, 286)])
        torsion2_list.extend(torsion2)
    np.save('./data/dihedral/dihedral_%s-%s-%s.npy' % (condition, 282, 285), torsion1_list)
    np.save('./data/dihedral/dihedral_%s-%s-%s.npy' % (condition, 283, 286),
            torsion2_list)
Beispiel #9
0
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import numpy as np

from msmbuilder import dataset

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# load trajectories

trajectories = dataset.MDTrajDataset("../ipynbs/trajectories/*.h5")

WIG = md.load("../original-models/3WIG_model.pdb")
AN2 = md.load("../original-models/4AN2_model.pdb")
EQD = md.load("../original-models/3EQD_model.pdb")
EQI = md.load("../original-models/3EQI_model.pdb")
EQG = md.load("../original-models/3EQG_model.pdb")
ORN = md.load("../original-models/3ORN_model.pdb")

def catkhrd(trajectories):

     # define empty lists

     D218 = []
     D222 = []
Beispiel #10
0
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from msmbuilder import dataset

# load trajectories

Abl_trajectories = dataset.MDTrajDataset(
    "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11400/*.h5")
Src_trajectories = dataset.MDTrajDataset(
    "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11401/*.h5")

Abl_trajectories_0 = dataset.MDTrajDataset(
    "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11400/*clone0.h5")
Src_trajectories_0 = dataset.MDTrajDataset(
    "/cbio/jclab/projects/fah/fah-data/munged2/no-solvent/11401/*clone0.h5")

# define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma)

Abl_DFG = [2257, 2255, 2265, 2270]
Src_DFG = [2190, 2188, 2198, 2203]


def DFG_dihedral(trajectories, def_DFG):

    dihedral = []
Beispiel #11
0
import sys

from msmbuilder import dataset

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# Define project.
project = sys.argv[1]

# Define kinase.
kinase = sys.argv[2]

# load trajectories
trajectories = dataset.MDTrajDataset(
    "/cbio/jclab/projects/fah/fah-data/munged/no-solvent/%s/*.h5" % project)

# import 2SRC structure to compare to
SRC2 = md.load("%s_2SRC_A.pdb" % kinase)

# Define hydrogen bond coordinates (0-indexed)
KER_hbond = {
    'SRC': [[28, 43], [43, 142]],
    'ABL': [[29, 44], [44, 144]],
    'DDR1': [[51, 68], [68, 185]]
}

# Define Activation loop (resid)
Aloop_def = {'SRC': [138, 158], 'ABL': [140, 160], 'DDR1': [181, 201]}

Beispiel #12
0
        print "*** kinalysis: analyzing project %s (%s) BY RUNS ***" % (
            args.project, protein)
    else:
        print "*** kinalysis: analyzing project %s (%s) ***" % (args.project,
                                                                protein)
else:
    myproject = 'no project'
    protein = 'SRC'
    files = "trajectories/*.h5"
    newpath = "./results/%s" % protein
    if not os.path.exists(newpath):
        os.makedirs(newpath)

# Define our trajectories

trajectories = dataset.MDTrajDataset(files)

### LETS FIND OUT SOME THINGS ABOUT ALL OF OUR TRAJECTORIES.
print "This script is analyzing %s simulations." % len(trajectories)
sim_num = len(trajectories)

lens = []
max_length = 0
for i, traj in enumerate(trajectories):
    if len(traj) > max_length:
        max_length = len(traj)
    if len(traj) == max_length:
        longest_traj = traj
    print i
    lens.append(len(traj))
Beispiel #13
0
####
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("whitegrid")
sns.set_context("poster")

import json
import argparse

# run inputs.py before running this script

# DEFINE YOUR INPUTS

files = "trajectories-ck2/CK2*.pdb"
trajectories = dataset.MDTrajDataset(files)
protein = 'CK2'
project = '11406'

#### END DEFINE INPUTS ####

# Make shukla plot

with open('KER_hbond.json', 'r') as fp:
    KER_hbond = json.load(fp)
with open('Aloop_def.json', 'r') as fp:
    Aloop_def = json.load(fp)


def shukla_coords(trajectories, KER, Aloop, SRC2):
Beispiel #14
0
# import libraries
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import numpy as np

from msmbuilder import dataset

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

# load trajectories
Shukla_trajectories = dataset.MDTrajDataset("/cbio/jclab/projects/shukla-trajectories/*part.pdb")

# import 2SRC structure to compare to
SRC2 = md.load("SRC_2SRC_A.pdb")

# Define hydrogen bond coordinates (0-indexed)
KER_src = [[35,50],[50,149]]

# Define Activation loop (resid)
Aloop_src = [145,165]

def shukla_coords(trajectories,KER,Aloop,SRC2):

    difference = []
    rmsd = []
Beispiel #15
0
import matplotlib
matplotlib.use('Agg')

import mdtraj as md
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from msmbuilder import dataset

# load trajectories

Abl_in = dataset.MDTrajDataset("Abl_DFG_in/ABL1*.pdb")
Abl_out = dataset.MDTrajDataset("Abl_DFG_out/ABL1*.pdb")

Src_in = dataset.MDTrajDataset("Src_DFG_in/SRC*.pdb")
Src_out = dataset.MDTrajDataset("Src_DFG_out/SRC*.pdb")

# define DFG dihedral ( this is from Roux umbrella sampling paper and are AlaCbeta, AlaCalpha, AspCalpha, AspCgamma)
#These are with hydrogens
#Abl_DFG = [2257,2255,2265,2270]
#Src_DFG = [2190,2188,2198,2203]
# Below are the dihedral coordinates in PDBs without hydrogens
Abl_DFG = [1117, 1116, 1121, 1123]
Src_DFG = [1074, 1073, 1078, 1080]


def DFG_dihedral(trajectories, def_DFG):

    dihedral = []