Beispiel #1
0
def shukla_coords(trajectories,KER,Aloop,SRC2):

    difference = []
    rmsd = []

    for traj in trajectories:

        # append difference
        k295e310 = md.compute_contacts(traj, [KER[0]])
        e310r409 = md.compute_contacts(traj, [KER[1]])
        difference.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm

        # append rmsd
        Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(140,160))
        Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))

        SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2)
        traj_cut = traj.atom_slice(Activation_Loop_kinase)

        rmsd.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm

    # flatten list of arrays
    flattened_difference = np.asarray([val for sublist in difference for val in sublist])
    flattened_rmsd = np.asarray([val for sublist in rmsd for val in sublist])

    return [flattened_rmsd, flattened_difference]
def test_trek():
    # setup
    with open("processed/p9761/24/7/info.json") as f:
        info = json.load(f)

    info = trajprocess.postprocess.stp(info, 'trek')

    # check stp cleanup
    assert not os.path.exists('{workdir}/stp/0/'.format(**info['path']))

    # check stp results
    traj = mdtraj.load(info['stp']['gens'][0], top=info['stp']['outtop'])
    assert traj.n_atoms == 30962
    assert len(traj) == 7

    # do ctr
    info = trajprocess.postprocess.ctr(info, "trek")

    # check ctr info
    assert not os.path.exists("{workdir}/cpptraj.tmp".format(**info['path']))
    assert not os.path.exists(
        "{workdir}/ctr/cpptraj.tmp".format(**info['path']))
    traj2 = mdtraj.load(info['ctr']['gens'][0], top=info['stp']['outtop'])

    # check ctr results
    # Trek has 518 protein residues
    pairs = np.random.randint(0, 518, (20, 2))
    cont1, _ = mdtraj.compute_contacts(traj, pairs)
    cont2, _ = mdtraj.compute_contacts(traj2, pairs)

    np.testing.assert_array_almost_equal(cont1, cont2, decimal=4)
Beispiel #3
0
def test_contact_0():

    pdb = md.load(get_fn('bpti.pdb'))
    contacts = np.loadtxt(get_fn('contacts.dat')).astype(int)

    ca, ca_pairs = md.compute_contacts(pdb, contacts, scheme='ca')
    closest, closest_pairs = md.compute_contacts(pdb, contacts, scheme='closest')
    closest_heavy, closest_heavy_pairs = md.compute_contacts(pdb, contacts, scheme='closest-heavy')
    sidechain, sidechain_pairs = md.compute_contacts(pdb, contacts, scheme='sidechain')
    sidechain_heavy, sidechain_heavy_pairs = md.compute_contacts(pdb, contacts, scheme='sidechain-heavy')

    ref_ca = np.loadtxt(get_fn('cc_ca.dat'))
    ref_closest = np.loadtxt(get_fn('cc_closest.dat'))
    ref_closest_heavy = np.loadtxt(get_fn('cc_closest-heavy.dat'))
    ref_sidechain = np.loadtxt(get_fn('cc_sidechain.dat'))
    ref_sidechain_heavy = np.loadtxt(get_fn('cc_sidechain-heavy.dat'))

    eq(ref_ca, ca.flatten())
    eq(ref_closest, closest.flatten())
    eq(ref_closest_heavy, closest_heavy.flatten())
    eq(ref_sidechain, sidechain.flatten())
    eq(ref_sidechain_heavy, sidechain_heavy.flatten())
    eq(contacts, ca_pairs)
    eq(contacts, closest_pairs)
    eq(contacts, closest_heavy_pairs)
    eq(contacts, sidechain_pairs)
    eq(contacts, sidechain_heavy_pairs)
Beispiel #4
0
def test_contact_0(get_fn):
    pdb = md.load(get_fn('bpti.pdb'))
    contacts = np.loadtxt(get_fn('contacts.dat')).astype(int)

    ca, ca_pairs = md.compute_contacts(pdb, contacts, scheme='ca')
    closest, closest_pairs = md.compute_contacts(pdb,
                                                 contacts,
                                                 scheme='closest')
    closest_heavy, closest_heavy_pairs = md.compute_contacts(
        pdb, contacts, scheme='closest-heavy')
    sidechain, sidechain_pairs = md.compute_contacts(pdb,
                                                     contacts,
                                                     scheme='sidechain')
    sidechain_heavy, sidechain_heavy_pairs = md.compute_contacts(
        pdb, contacts, scheme='sidechain-heavy')

    ref_ca = np.loadtxt(get_fn('cc_ca.dat'))
    ref_closest = np.loadtxt(get_fn('cc_closest.dat'))
    ref_closest_heavy = np.loadtxt(get_fn('cc_closest-heavy.dat'))
    ref_sidechain = np.loadtxt(get_fn('cc_sidechain.dat'))
    ref_sidechain_heavy = np.loadtxt(get_fn('cc_sidechain-heavy.dat'))

    eq(ref_ca, ca.flatten())
    eq(ref_closest, closest.flatten())
    eq(ref_closest_heavy, closest_heavy.flatten())
    eq(ref_sidechain, sidechain.flatten())
    eq(ref_sidechain_heavy, sidechain_heavy.flatten())
    eq(contacts, ca_pairs)
    eq(contacts, closest_pairs)
    eq(contacts, closest_heavy_pairs)
    eq(contacts, sidechain_pairs)
    eq(contacts, sidechain_heavy_pairs)
Beispiel #5
0
def shukla_coords(trajectories, KER, Aloop, SRC2):

    difference = []
    rmsd = []

    for traj in trajectories:

        # append difference
        k295e310 = md.compute_contacts(traj, [KER[0]])
        e310r409 = md.compute_contacts(traj, [KER[1]])
        difference.append(
            10 * (e310r409[0] -
                  k295e310[0]))  # 10x because mdtraj is naturally in nm

        # append rmsd
        Activation_Loop_SRC2 = SRC2.top.select(
            "backbone and (resid %s to %s)" % (Aloop[0], Aloop[1]))
        Activation_Loop_kinase = traj.top.select(
            "backbone and (resid %s to %s)" % (Aloop[0], Aloop[1]))

        SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2)
        traj_cut = traj.atom_slice(Activation_Loop_kinase)

        rmsd.append(10 *
                    (md.rmsd(traj_cut, SRC2_cut,
                             frame=0)))  # 10x because mdtraj is naturaly in nm

    return [rmsd, difference]
Beispiel #6
0
 def test_Residue_Mindist_Ca_array_periodic(self):
     traj = mdtraj.load(pdbfile)
     # Atoms most far appart in Z
     atom_minz = traj.xyz.argmin(1).squeeze()[-1]
     atom_maxz = traj.xyz.argmax(1).squeeze()[-1]
     # Residues with the atoms most far appart in Z
     res_minz = traj.topology.atom(atom_minz).residue.index
     res_maxz = traj.topology.atom(atom_maxz).residue.index
     contacts = np.array([[res_minz, res_maxz]])
     # Tweak the trajectory so that a (bogus) PBC exists (otherwise traj._have_unitcell is False)
     traj.unitcell_angles = [90, 90, 90]
     traj.unitcell_lengths = [1, 1, 1]
     self.feat.add_residue_mindist(scheme='ca',
                                   residue_pairs=contacts,
                                   periodic=False)
     D = self.feat.transform(traj)
     Dperiodic_true = mdtraj.compute_contacts(traj,
                                              scheme='ca',
                                              contacts=contacts,
                                              periodic=True)[0]
     Dperiodic_false = mdtraj.compute_contacts(traj,
                                               scheme='ca',
                                               contacts=contacts,
                                               periodic=False)[0]
     # This asserts that the periodic option is having an effect at all
     assert not np.allclose(
         Dperiodic_false,
         Dperiodic_true,
     )
     # This asserts that the periodic option is being handled correctly by pyemma
     assert np.allclose(D, Dperiodic_false)
     assert len(self.feat.describe()) == self.feat.dimension()
Beispiel #7
0
def catkhrd(trajectories):

     # define empty lists

     D218 = []
     D222 = []

     for traj in trajectories:

          #append h188s218 difference

          h188s218 = md.compute_contacts(traj, [[120,151]],scheme='ca')
          D218.append(h188s218[0])

          #append k97s222 difference

          k97s222 = md.compute_contacts(traj, [[29,155]],scheme='ca')
          D222.append(k97s222[0])

     #flatten these lists of arrays

     flattened_h188s218 = np.asarray([val for sublist in D218 for val in sublist])
     flattened_k97s222 = np.asarray([val for sublist in D222 for val in sublist])

     return [flattened_h188s218, flattened_k97s222]
Beispiel #8
0
def shukla_coords_byrun(files,KER,Aloop,SRC2):

    difference = []
    rmsd = []

    difference_combinetrajs = []
    rmsd_combinetrajs = []

    path_base = files.split('*')[0]
    clone0_files = "%s/*clone0.h5" % path_base
    globfiles = glob(clone0_files)

    runs_list = []

    for filename in globfiles:
        run_string = re.search('run([^-]+)',filename).group(1)
        run = int(run_string)
        if run not in runs_list:
            runs_list.append(run)
        runs_list.sort()


    for run in runs_list:

        trajectories = dataset.MDTrajDataset("%s/run%d-clone*1.h5" % (path_base,run))
        print "Run %s has %s trajectories." % (run,len(trajectories))

        for traj in trajectories:

            # append difference
            k295e310 = md.compute_contacts(traj, [KER[0]])
            e310r409 = md.compute_contacts(traj, [KER[1]])
            difference_combinetrajs.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm

            # append rmsd
            Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))
            Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))

            SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2)
            traj_cut = traj.atom_slice(Activation_Loop_kinase)

            rmsd_combinetrajs.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm

        # flatten list of arrays
        difference_combinetrajs = np.asarray([val for sublist in difference_combinetrajs for val in sublist])
        rmsd_combinetrajs = np.asarray([val for sublist in rmsd_combinetrajs for val in sublist])

        difference.append(difference_combinetrajs)
        difference_combinetrajs = []

        rmsd.append(rmsd_combinetrajs)
        rmsd_combinetrajs = []

    return [rmsd, difference]
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True):

	a = time.time()
	dihedral_indices = []
	residue_order = []
	if len(dihedral_residues) > 0:
		for dihedral_type in dihedral_types:
			if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues))

		#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

		#print("feauturizing manually:")
		dihedral_angles = []

		for dihedral_type in dihedral_indices:
			angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type))
			dihedral_angles.append(np.sin(angles))
			dihedral_angles.append(np.cos(angles))

		manual_features = np.transpose(np.concatenate(dihedral_angles))

	if len(resSeq_pairs) > 0:
		top = md.load_frame(traj_file, index=0).topology
		resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs)
		contact_features = []
		if iterative:
			try:
				for chunk in md.iterload(traj_file, chunk = 1000):
				#	chunk = fix_traj(chunk)
				#chunk = md.load(traj_file,stride=1000)
				#print(resIndex_pairs[0:10])
					chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
					print(np.shape(chunk_features))
					contact_features.append(chunk_features)
				contact_features = np.concatenate(contact_features)
			except Exception,e:
				print str(e)
				print("Failed")
				return
				#traj = md.load(traj_file)
				#contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
		else:
			try:
				traj = md.load(traj_file)
				contact_features =  md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
			except Exception,e:
				print str(e)
				print("Failed for traj")
				return
Beispiel #10
0
    def __init__(self,
                 native,
                 group1,
                 group2,
                 ca_cutoff_angstroms=10.,
                 verbose=True):
        self.native = native[0]  # ensure only a single frame is passed

        res_group1, res_group2 = [
            np.array(
                sorted(set([native.topology.atom(i).residue.index
                            for i in g]))) for g in (group1, group2)
        ]

        contact_pairs = np.array([(i, j) for i in res_group1
                                  for j in res_group2])
        is_contact = (
            10. *
            md.compute_contacts(native, scheme='ca', contacts=contact_pairs)[0]
            < ca_cutoff_angstroms)[0]
        contacts = contact_pairs[is_contact]

        interface_residues = sorted(
            set(contacts[:, 0]).union(set(contacts[:, 1])))
        if verbose:
            print '%i interface residues (%i,%i)' % (len(interface_residues),
                                                     len(set(contacts[:, 0])),
                                                     len(set(contacts[:, 1])))
        self.interface_atom_indices = np.array([
            a.index for a in native.topology.atoms
            if a.residue.index in interface_residues
        ])
Beispiel #11
0
        def get_interface_contacts(frame, ca_cutoff_ang=10.):
            """
            Identify interface residues between ligand chains and receptor chains using mdtraj. 
            Residues identified by user-specified c-alpha cutoff, preset to 10 angstroms. 
            Feeds into contacts_iterator. 
            """

            #Get list of residues in receptor and ligand
            r_residues = []
            for chain in self.receptor_chains:
                r_residues.extend([residue.index for residue in frame.topology.chain(chain).residues])

            l_residues = []
            for chain in self.ligand_chains:
                l_residues.extend([residue.index for residue in frame.topology.chain(chain).residues])

            # Make an array of potential contact pairs between receptor and ligand
            contact_pairs = np.array([(i,j) for i in r_residues for j in l_residues])

            # Check which ones fall within c-alpha distance cutoff    
            is_contact = (10.*md.compute_contacts(frame, scheme='ca', contacts=contact_pairs)[0] < ca_cutoff_ang)[0]

            # Go from bool truth values to the actual residues
            contacts = contact_pairs[is_contact]

            # Go from pairs to flattened list of unique residues involved in contacts
            self.interface_residues = sorted(set(contacts[:,0]).union(set(contacts[:,1])))

            return self.interface_residues
Beispiel #12
0
 def __init__(self, struct):
     # read the structure from a pdb file (one chain, no hetatm, no water, just the good old protein, please
     self.struct = struct
     # compute the contacts. This computes all the minimum atom distances between residues. The cutoff will be applied later
     self.dd, self.rp = mdtraj.compute_contacts(self.struct)
     # ah si, this is the sequence of the pdb chain (the residues which are resolved, may be less than the FASTA from DB website)
     self.seq = ''.join([r.code for r in self.struct.topology.residues])
Beispiel #13
0
def maker_w_strings(data):
    i, sdf_state, num_residues, pdb = data
    tmp_file = str(id(multiprocessing.current_process()))
    create_pdb(sdf_state, pdb, tmp_file)  # create the pdb for one state

    # Load the file into mdtraj
    t = md.load("tmp{0}.pdb".format(tmp_file))
    pl = len(list(t.topology.residues))
    resi = np.arange(pl)
    pairs = list(itertools.product(resi, resi))

    # compute constacts
    matrix, l = md.compute_contacts(t, contacts=pairs, scheme="closest-heavy")
    matrix = np.array(matrix).reshape((pl, pl))

    ### generate protein-ligand mask
    mask = np.zeros((pl, pl))
    for i in range(pl - num_residues):
        for j in range(pl - num_residues):
            # ligand
            mask[i, j] = -1

    for i in range(pl - num_residues, pl):
        for j in range(pl - num_residues, pl):
            # protein
            mask[i, j] = 1

    p_img = cv2.resize(matrix, dsize=(64, 64),
                       interpolation=cv2.INTER_CUBIC).reshape(64, 64, 1)
    m_img = cv2.resize(mask, dsize=(64, 64),
                       interpolation=cv2.INTER_CUBIC).reshape(64, 64, 1)

    p_contact_matrix = np.concatenate([p_img, m_img], axis=-1)
    os.remove("tmp{0}.pdb".format(tmp_file))
    return p_contact_matrix
def get_dists(filename):

    conf = md.load(filename)

    num_peptide_residues = len(conf.top.select("name == CA and chainid == 2"))
    num_binding_site_residues = 180
    num_total_residues = len(conf.top.select("name == CA"))

    peptide_indices = range(num_total_residues)[-num_peptide_residues:]
    binding_site_indices = range(num_binding_site_residues)

    contacts_all = list(
        itertools.product(peptide_indices, binding_site_indices))
    #print("Num contacts:", len(contacts_all))

    distances, pairs = md.compute_contacts(conf, contacts_all)
    distances = distances[0]  # only a single frame

    interactions = []
    resres_dists = []
    residues = [r for r in conf.top.residues]
    for i, p in enumerate(pairs):
        r1 = str(residues[p[0]])[:3]
        r2 = str(residues[p[1]])[:3]
        interactions.append(r1 + "-" + r2)
        resres_dists.append(distances[i] * 10)

    return interactions, resres_dists
def featurize_sig(filename):

    conf = md.load(filename)

    num_peptide_residues = len(conf.top.select("name == CA and chainid == 2"))
    num_binding_site_residues = 180
    num_total_residues = len(conf.top.select("name == CA"))

    peptide_indices = range(num_total_residues)[-num_peptide_residues:]
    binding_site_indices = range(num_binding_site_residues)

    contacts_all = list(
        itertools.product(peptide_indices, binding_site_indices))
    #print("Num contacts:", len(contacts_all))

    distances, pairs = md.compute_contacts(conf, contacts_all)
    distances = distances[0]  # only a single frame

    feature_vec = np.zeros((num_dim, ))

    residues = [r for r in conf.top.residues]
    for i, p in enumerate(pairs):
        r1 = str(residues[p[0]])[:3]
        r2 = str(residues[p[1]])[:3]
        resres_dist = distances[i] * 10
        alpha = 5  #np.log(99)+4
        feature_vec[new_interaction_to_index[
            r1 + "-" + r2]] += 1. / (1 + np.exp(resres_dist - alpha))
        if abs(distances[i]) < 0.001: print(r1, r2, distances[i], p)

    return feature_vec
def get_distances(filename):

    conf = md.load(filename)

    num_peptide_residues = len(conf.top.select("name == CA and chainid == 2"))
    num_binding_site_residues = 180
    num_total_residues = len(conf.top.select("name == CA"))

    peptide_indices = range(num_total_residues)[-num_peptide_residues:]
    binding_site_indices = range(num_binding_site_residues)

    contacts_all = list(
        itertools.product(peptide_indices, binding_site_indices))
    #print("Num contacts:", len(contacts_all))

    distances, pairs = md.compute_contacts(conf, contacts_all)
    distances = distances[0]  # only a single frame

    feature_vec = np.zeros((num_dim, ))

    residues = [r for r in conf.top.residues]
    resres_names = []
    pep_mhc_distances = []
    for i, p in enumerate(pairs):
        r1 = str(residues[p[0]])
        r2 = str(residues[p[1]])
        resres_names.append([r1, r2])
        pep_mhc_distances.append(distances[i] * 10)
        #feature_vec[new_interaction_to_index[r1+"-"+r2]] += 1./(distances[i]*10)
        #if abs(distances[i]) < 0.001: print(r1, r2, distances[i], p)

    return resres_names, pep_mhc_distances
def compute_contacts_below_cutoff(traj_file_frame, cutoff = 100000.0, contact_residues = [], anton = False):
	traj_file = traj_file_frame[0]
	frame = md.load_frame(traj_file, index = 0)
	#frame = fix_traj(frame)
	top = frame.topology
	
	distance_residues = []
	res_indices = []
	resSeq_to_resIndex = {}
	residue_full_infos = []

	for i in range(0, len(contact_residues)):
		residue = contact_residues[i]
		indices = [r.index for r in top.residues if r.resSeq == residue[1] and r.chainid == residue[0] and not r.is_water]
		if len(indices) == 0:
			print("No residues in trajectory for residue %d" %residue)
			continue
		else:
			ind = indices[0]
			for j in indices:
				if j != ind: 
					#print("Warning: multiple res objects for residue %d " %residue)
					if "CB" in [str(a) for a in r.atoms for r in top.residues if r.index == ind]:
						ind = j
			res_indices.append(ind)
			distance_residues.append(residue)
			resSeq_to_resIndex[residue] = ind
	
	resSeq_combinations = itertools.combinations(distance_residues, 2)
	res_index_combinations = []
	resSeq_pairs = [c for c in resSeq_combinations]
	for combination in resSeq_pairs:
		res0 = combination[0]
		res1 = combination[1]
		res_index0 = resSeq_to_resIndex[res0]
		res_index1 = resSeq_to_resIndex[res1]
		res_index_combinations.append((res_index0, res_index1))


	final_resSeq_pairs = []
	final_resIndex_pairs = []

	distances = md.compute_contacts(frame, contacts = res_index_combinations, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
	#print(distances)
	print(np.shape(distances))
	for i in range(0, len(distances[0])):
		distance = distances[0][i]
		#print(distance)
		if distance < cutoff:
			final_resIndex_pairs.append(res_index_combinations[i])
			final_resSeq_pairs.append(resSeq_pairs[i])

	for pair in final_resIndex_pairs:
		info0 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[0]]
		info1 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[1]]
		residue_full_infos.append((info0, info1))

	print(len(final_resSeq_pairs))
	print(len(final_resIndex_pairs))
	return((final_resSeq_pairs, residue_full_infos))
Beispiel #18
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space derived from
        residue-residue distances

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """

        distances, _ = md.compute_contacts(traj, self.contacts,
                                           self.scheme, self.ignore_nonprotein)
        return self._transform(distances)
Beispiel #19
0
def igeom2mindist_COMdist_truncation(igeom,
                                     res_COM_cutoff_Ang=25,
                                     ):


    COMs_xyz = geom2COMxyz(igeom)

    COMs_dist_triu = _np.array([pdist(ixyz) for ixyz in COMs_xyz])


    COMs_under_cutoff = COM_n_from_COM_dist_triu(COMs_dist_triu,
                                                 cutoff_nm=res_COM_cutoff_Ang/10)

    COMs_under_cutoff_pair_idxs = _np.argwhere(COMs_under_cutoff.sum(0) >= 1).squeeze()
    pairs = _np.vstack(_np.triu_indices(igeom.n_residues, 1)).T[COMs_under_cutoff_pair_idxs]
    try:
        ctcs, ctc_idxs_dummy = _md.compute_contacts(igeom, pairs)
    except MemoryError:
        print("\nCould not fit %u contacts for %u frames into memory"%(len(pairs), igeom.n_frames))
        raise


    assert _np.allclose(pairs, ctc_idxs_dummy)

    return ctcs.min(0), pairs, COMs_under_cutoff_pair_idxs
Beispiel #20
0
    def describe_features(self, traj):
        """Return a list of dictionaries describing the features in Contacts."""
        x = []
        # fill in the atom indices using just the first frame
        distances, residue_indices = md.compute_contacts(traj, self.contacts, self.scheme, self.ignore_nonprotein)
        n = residue_indices.shape[0]
        aind = ["N/A"] * n
        resSeq = [np.array([traj.top.residue(j).resSeq for j in i]) for i in residue_indices]
        resid = [np.array([traj.top.residue(j).index for j in i]) for i in residue_indices]
        resnames = [[traj.topology.residue(j).name for j in i] for i in resid]
        bigclass = [self.contacts] * n
        smallclass = [self.scheme] * n
        otherInfo = [self.ignore_nonprotein] * n

        for i in range(n):
            d_i = dict(
                resname=resnames[i],
                atomind=aind[i],
                resSeq=resSeq[i],
                resid=resid[i],
                otherInfo=otherInfo[i],
                bigclass=bigclass[i],
                smallclass=smallclass[i],
            )
            x.append(d_i)

        return x
Beispiel #21
0
    def _Compute_Contacts_Between_Residues(self,
                                           Pairs=None,
                                           Cutoff=0.5,
                                           Trajectory=None):

        output = mdtraj.compute_contacts(Trajectory, Pairs)

        distances = output[0].T
        atom_pairs = output[1]

        residues_in_contact = []
        contacts = []
        idx = 0
        cont = 0
        for frame in distances:
            for d in frame:
                if (d < Cutoff):
                    atom_pair = atom_pairs[idx]

                    resid1 = self.top.residue(atom_pair[0])
                    resid2 = self.top.residue(atom_pair[1])

                    arr = [resid1, resid2]

                    if (arr not in residues_in_contact):
                        residues_in_contact.append(arr)

                cont += 1

            contacts.append(cont)
            cont = 0
            idx += 1

        return (residues_in_contact, contacts)
Beispiel #22
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space derived from
        residue-residue distances

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """

        # check to make sure topologies are consistent with the reference frame
        try:
            assert traj.top == self.reference_frame.top
        except:
            warnings.warn("The topology of the trajectory is not" +
                          "the same as that of the reference frame," +
                          "which might give meaningless results.")
        distances, _ = md.compute_contacts(traj, self.contacts,
                                        self.scheme, ignore_nonprotein=False)
        return self._transform(distances)
Beispiel #23
0
def test_contact_4(get_fn):
    pdb = md.load(
        get_fn('1am7_protein.pdb')
    )  # protonated and including at least one glycine residue (which has no heavy atoms in its sidechain)
    contacts = md.compute_contacts(
        pdb, contacts='all', scheme='sidechain-heavy'
    )  # test passes if this doesn't raise an exception
Beispiel #24
0
 def test_Residue_Mindist_Ca_all(self):
     n_ca = self.feat.topology.n_atoms
     self.feat.add_residue_mindist(scheme='ca')
     D = self.feat.transform(self.traj)
     Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
     assert np.allclose(D, Dref)
     assert len(self.feat.describe()) == self.feat.dimension()
Beispiel #25
0
 def test_Residue_Mindist_Ca_array(self):
     contacts=np.array([[20,10,], [10,0]])
     self.feat.add_residue_mindist(scheme='ca', residue_pairs=contacts)
     D = self.feat.transform(self.traj)
     Dref = mdtraj.compute_contacts(self.traj, scheme='ca', contacts=contacts)[0]
     assert np.allclose(D, Dref)
     assert len(self.feat.describe())==self.feat.dimension()
Beispiel #26
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space derived from
        residue-residue distances

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """

        # check to make sure topologies are consistent with the reference frame
        try:
            assert traj.top == self.reference_frame.top
        except:
            warnings.warn("The topology of the trajectory is not" +
                          "the same as that of the reference frame," +
                          "which might give meaningless results.")
        distances, _ = md.compute_contacts(traj, self.contacts,
                                        self.scheme, ignore_nonprotein=False,
                                        periodic = self.periodic)
        return self._transform(distances)
Beispiel #27
0
    def describe_features(self, traj):
        """Return a list of dictionaries describing the features in Contacts."""
        x = []
        # fill in the atom indices using just the first frame
        distances, residue_indices = md.compute_contacts(
            traj, self.contacts, self.scheme, self.ignore_nonprotein)
        n = residue_indices.shape[0]
        aind = ["N/A"] * n
        resSeq = [
            np.array([traj.top.residue(j).resSeq for j in i])
            for i in residue_indices
        ]
        resid = [
            np.array([traj.top.residue(j).index for j in i])
            for i in residue_indices
        ]
        resnames = [[traj.topology.residue(j).name for j in i] for i in resid]
        bigclass = [self.contacts] * n
        smallclass = [self.scheme] * n
        otherInfo = [self.ignore_nonprotein] * n

        for i in range(n):
            d_i = dict(resname=resnames[i],
                       atomind=aind[i],
                       resSeq=resSeq[i],
                       resid=resid[i],
                       otherInfo=otherInfo[i],
                       bigclass=bigclass[i],
                       smallclass=smallclass[i])
            x.append(d_i)

        return x
Beispiel #28
0
def compute_mdtraj_order_parmeters(trajectory_file, rmsd_reference_structure=None):
    # documentation: http://mdtraj.org/1.8.0/analysis.html#
    trajectory = md.load(trajectory_file)

    return_values = []
    return_value_names = []

    if not rmsd_reference_structure == None:
        reference = md.load(rmsd_reference_structure)
        rmsd = md.rmsd(trajectory, reference)
        return_values.append(rmsd)
        return_value_names.append("RMSD")

    hydrogen_bonds = np.array([np.sum(x) for x in md.kabsch_sander(trajectory)])
    return_values.append(hydrogen_bonds)
    return_value_names.append("HBondEnergy")

    ss = md.compute_dssp(trajectory)
    shape = ss.shape
    transdict = dict(zip(list(set(list(ss.flatten()))),range(len(list(set(list(ss.flatten())))))))
    ss = np.array([transdict[x] for x in ss.flatten()]).reshape(shape).T
    return_values.append(ss)
    return_value_names.append("SecondaryStructure")

    rg = md.compute_rg(trajectory)
    return_values.append(rg)
    return_value_names.append("Rg")

    distances, residue_pairs = md.compute_contacts(trajectory, scheme='ca')
    contacts = md.geometry.squareform(distances, residue_pairs)
    return_values.append(contacts)
    return_value_names.append("Contacts")

    return dict(zip(return_value_names, return_values))
Beispiel #29
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space derived from
        residue-residue distances

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """

        distances, _ = md.compute_contacts(traj, self.contacts, self.scheme,
                                           self.ignore_nonprotein)
        return self._transform(distances)
Beispiel #30
0
def contacts_bonds(traj, peptide_chain ):

  group_1 = [residue.index for residue in traj.topology.chain(peptide_chain).residues ]
  group_2 = [residue.index for residue in traj.topology.chain(0).residues or traj.topology.chain(1).residues or   traj.topology.chain(2).residues ]
  pairs = list(product(group_1, group_2))

  contacts_bonds= mdtraj.compute_contacts(traj,pairs , scheme='closest-heavy', ignore_nonprotein=True, periodic=True, soft_min=False, soft_min_beta=20)
def plot_native_state_contact_map(title):

    colors = [('white')] + [(cm.jet(i)) for i in xrange(1,256)]
    new_map = matplotlib.colors.LinearSegmentedColormap.from_list('new_map', colors, N=256)

    if os.path.exists("contact_pairs.dat") and os.path.exists("contact_probabilities.dat"):
        pairs = np.loadtxt("contact_pairs.dat")
        probability = np.loadtxt("contact_probabilities.dat")
    else:
        print "  Loading BeadBead.dat"
        beadbead = np.loadtxt("BeadBead.dat",dtype=str)
        sigij = beadbead[:,5].astype(float)
        epsij = beadbead[:,6].astype(float)
        deltaij = beadbead[:,7].astype(float)
        interaction_numbers = beadbead[:,4].astype(str)
        pairs = beadbead[:,:2].astype(int)
        pairs -= np.ones(pairs.shape,int)
        np.savetxt("contact_pairs.dat",pairs)

        print "  Computing distances with mdtraj..."
        traj = md.load("traj.xtc",top="Native.pdb")
        distances = md.compute_contacts(traj,pairs)
        contacts = (distances[0][:] <= 1.2*sigij).astype(int)
        print "  Computing contact probability..."
        probability = sum(contacts.astype(float))/contacts.shape[0]
        np.savetxt("contact_probabilities.dat",probability)

    Qref = np.loadtxt("Qref_cryst.dat")
    C = np.zeros(Qref.shape,float)

    for k in range(len(pairs)):
        C[pairs[k][0],pairs[k][1]] = probability[k]

    print "  Plotting..."
    plt.figure()
    plt.subplot(1,1,1,aspect=1)
    ax = plt.subplot(1,1,1,aspect=1)
    plt.pcolor(C,cmap=new_map)
    for k in range(len(pairs)):
        if probability[k] > 0.01:
            plt.plot(pairs[k][1],pairs[k][0],marker='s',ms=3.0,markeredgecolor=new_map(probability[k]),color=new_map(probability[k]))
        else:
            continue
    plt.xlim(0,len(Qref))
    plt.ylim(0,len(Qref))
    #plt.text(10,70,name.upper(),fontsize=70,color="r")
    ax = plt.gca()
    cbar = plt.colorbar()
    cbar.set_clim(0,1)
    cbar.set_label("Contact probability",fontsize=20)
    cbar.ax.tick_params(labelsize=20)
    plt.xlabel("Residue i",fontsize=20)
    plt.ylabel("Residue j",fontsize=20)
    #plt.title("Native State Contact Map "+title,fontsize=20)
    plt.title(title)
    for label in ax.get_xticklabels() + ax.get_yticklabels():
        label.set_fontsize(15)
    print "  Saving..."
    plt.savefig("native_state_contact_map.pdf")
def ca_contact_pca(traj, n_pc, cutoff_angstroms=8., variance_scaled=True):
    from sklearn.decomposition import TruncatedSVD
    m =  (10.*md.compute_contacts(traj,scheme='ca')[0]<cutoff_angstroms)
    m = m-m.mean(axis=0)
    trunc_svd = TruncatedSVD(n_pc).fit(m)
    pc = trunc_svd.transform(m)*(trunc_svd.explained_variance_ratio_ if variance_scaled else 1.)
    del m
    return pc
    def compute_contacts(self, residue_pairs):
        """

        :param residue_pairs:  An array containing pairs of indices (0-indexed) of residues to compute the contacts between
        :return: distances:  np.ndarray, shape=(n_frames, n_pairs); residues_pairs: np.ndarray, shape=(n_pairs, 2)

        """

        return md.compute_contacts(self.traj, residue_pairs)
Beispiel #34
0
def test_contact_3(get_fn):
    pdb = md.load(get_fn('bpti.pdb'))
    beta = 20
    dists, pairs = md.compute_contacts(pdb, soft_min=True, soft_min_beta=beta)

    maps = md.geometry.squareform(dists, pairs)
    for i, (r0, r1) in enumerate(pairs):
        for t in range(pdb.n_frames):
            assert np.allclose(beta / np.log(np.sum(np.exp(beta / maps[t, r0, r1]))), dists[t, i])
Beispiel #35
0
def _distances(traj: md.Trajectory, scheme: str, transform: str,
               centre: Union[float, None],  steepness: Union[float, None]):
    feat, ix = md.compute_contacts(traj, contacts='all', scheme=scheme)
    if transform == 'logistic':
        assert (centre is not None) and (steepness is not None)
        tmp = 1.0/(1.+np.exp((-1)*steepness*(feat-centre)))
        assert np.allclose(tmp.shape, feat.shape)
        feat = tmp
    return feat
Beispiel #36
0
    def _get_contact_pairs(self, contacts):
        if self.scheme == 'ca':
            if not any(
                    a
                    for a in self.reference_frame.top.chain(ligand_chain).atoms
                    if a.name.lower() == 'ca'):
                raise ValueError("Bad scheme: the ligand has no alpha carbons")

        # this is really similar to mdtraj/contact.py, but ensures that
        # md.compute_contacts  is always seeing an array of exactly the
        # contacts we want to specify
        if isinstance(contacts, string_types):
            if contacts.lower() != 'all':
                raise ValueError(
                    '({}) is not a valid contacts specifier'.format(
                        contacts.lower()))

            self.residue_pairs = []
            for i in np.arange(
                    self.reference_frame.top.chain(
                        self.protein_chain).n_residues):
                for j in np.arange(
                        self.reference_frame.top.chain(
                            self.ligand_chain).n_residues):
                    self.residue_pairs.append(
                        (i + self.p_residue_offset, j + self.l_residue_offset))

            self.residue_pairs = np.array(self.residue_pairs)

            if len(self.residue_pairs) == 0:
                raise ValueError('No acceptable residue pairs found')

        else:
            self.residue_pairs = ensure_type(np.asarray(contacts),
                                             dtype=np.int,
                                             ndim=2,
                                             name='contacts',
                                             shape=(None, 2),
                                             warn_on_cast=False)
            if not np.all(
                (self.residue_pairs >= 0) *
                (self.residue_pairs < self.reference_frame.n_residues)):
                raise ValueError('contacts requests a residue that is not '\
                                 'in the permitted range')

        if self.binding_pocket is not 'all':
            ref_distances, _ = md.compute_contacts(self.reference_frame,
                                                   self.residue_pairs,
                                                   self.scheme,
                                                   ignore_nonprotein=False)
            self.residue_pairs = self.residue_pairs[np.where(
                ref_distances < self.binding_pocket)[1]]
            if len(self.residue_pairs) == 0:
                raise ValueError('No residue pairs within binding pocket')

        return self.residue_pairs
Beispiel #37
0
 def test_Residue_Mindist_Ca_all_threshold(self):
     threshold = .7
     self.feat.add_residue_mindist(scheme='ca', threshold=threshold)
     D = self.feat.transform(self.traj)
     Dref = mdtraj.compute_contacts(self.traj, scheme='ca')[0]
     Dbinary = np.zeros_like(Dref)
     I = np.argwhere(Dref <= threshold)
     Dbinary[I[:, 0], I[:, 1]] = 1
     assert np.allclose(D, Dbinary)
     assert len(self.feat.describe()) == self.feat.dimension()
Beispiel #38
0
def get_pocket_residues(traj):
    traj = traj.atom_slice(traj.topology.select("protein or resn UNL"))
    resn = len(list(traj.topology.residues))
    group_1 = list(range(resn - 1))
    group_2 = [resn - 1]
    pairs = list(itertools.product(group_1, group_2))
    res, pairs = md.compute_contacts(traj, pairs)
    pocket_resids = list(np.where(res[0] <= 0.5)[0] + 1)
    pocket_resids = ["resid {}".format(id) for id in pocket_resids]
    pocket_resids = " or ".join(pocket_resids)
    return pocket_resids
def shukla_coords(trajectories,KER,Aloop,SRC2):

    difference = []
    rmsd = []

    for traj in trajectories:

        # append difference
        k295e310 = md.compute_contacts(traj, [KER[0]])
        e310r409 = md.compute_contacts(traj, [KER[1]])
        difference.append(10*(e310r409[0] - k295e310[0])) # 10x because mdtraj is naturally in nm

        # append rmsd
        Activation_Loop_SRC2 = SRC2.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))
        Activation_Loop_kinase = traj.top.select("backbone and (resid %s to %s)" %(Aloop[0],Aloop[1]))

        SRC2_cut = SRC2.atom_slice(Activation_Loop_SRC2)
        traj_cut = traj.atom_slice(Activation_Loop_kinase)

        rmsd.append(10*(md.rmsd(traj_cut,SRC2_cut,frame=0))) # 10x because mdtraj is naturaly in nm

    return [rmsd, difference]
Beispiel #40
0
def test_contact_1():
    pdb = md.load(get_fn('bpti.pdb'))
    dists, pairs = md.compute_contacts(pdb)
    for r0, r1 in pairs:
        # are these valid residue indices?
        pdb.topology.residue(r0)
        pdb.topology.residue(r1)

        assert not (abs(r0 - r1) < 3)

    maps = md.geometry.squareform(dists, pairs)
    for i, (r0, r1) in enumerate(pairs):
        for t in range(pdb.n_frames):
            eq(maps[t, r0, r1], dists[t, i])
def test_ContactFeaturizer_describe_features():
    scheme = np.random.choice(['ca','closest','closest-heavy'])
    feat = ContactFeaturizer(scheme=scheme, ignore_nonprotein=True)
    rnd_traj = np.random.randint(len(trajectories))
    features = feat.transform([trajectories[rnd_traj]])
    df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj]))

    for f in range(25):
        f_index = np.random.choice(len(df))

        residue_ind = df.iloc[f_index].resids
        feature_value, _ = md.compute_contacts(trajectories[rnd_traj],
                                               contacts=[residue_ind],
                                               scheme=scheme)
        assert (features[0][:, f_index] == feature_value.flatten()).all()
Beispiel #42
0
    def describe_features(self, traj):
        """Return a list of dictionaries describing the contacts features.

        Parameters
        ----------
        traj : mdtraj.Trajectory
            The trajectory to describe

        Returns
        -------
        feature_descs : list of dict
            Dictionary describing each feature with the following information
            about the atoms participating in each dihedral
                - resnames: unique names of residues
                - atominds: the four atom indicies
                - resseqs: unique residue sequence ids (not necessarily
                  0-indexed)
                - resids: unique residue ids (0-indexed)
                - featurizer: Contact
                - featuregroup: ca, heavy etc.
        """
        feature_descs = []
        # fill in the atom indices using just the first frame
        distances, residue_indices = md.compute_contacts(traj[0],
                                        self.contacts, self.scheme,
                                        ignore_nonprotein=False,
                                        periodic=self.periodic)
        top = traj.topology

        aind = []
        resseqs = []
        resnames = []
        for resid_ids in residue_indices:
            aind += ["N/A"]
            resseqs += [[top.residue(ri).resSeq for ri in resid_ids]]
            resnames += [[top.residue(ri).name for ri in resid_ids]]

        zippy = itertools.product(["Ligand Contact"], [self.scheme],
                                  ["N/A"],
                                  zip(aind, resseqs, residue_indices, resnames))

        feature_descs.extend(dict_maker(zippy))

        return feature_descs
Beispiel #43
0
    def _get_contact_pairs(self, contacts):
        if self.scheme=='ca':
            if not any(a for a in self.reference_frame.top.chain(self.ligand_chain).atoms
                       if a.name.lower() == 'ca'):
                raise ValueError("Bad scheme: the ligand has no alpha carbons")

        # this is really similar to mdtraj/contact.py, but ensures that
        # md.compute_contacts  is always seeing an array of exactly the
        # contacts we want to specify
        if isinstance(contacts, string_types):
            if contacts.lower() != 'all':
                raise ValueError('({}) is not a valid contacts specifier'.format(contacts.lower()))

            self.residue_pairs = []
            for i in np.arange(self.reference_frame.top.chain(self.protein_chain).n_residues):
                for j in np.arange(self.reference_frame.top.chain(self.ligand_chain).n_residues):
                    self.residue_pairs.append((i+self.p_residue_offset,
                                          j+self.l_residue_offset))

            self.residue_pairs = np.array(self.residue_pairs)

            if len(self.residue_pairs) == 0:
                raise ValueError('No acceptable residue pairs found')

        else:
            self.residue_pairs = ensure_type(np.asarray(contacts),
                                        dtype=np.int, ndim=2, name='contacts',
                                        shape=(None, 2), warn_on_cast=False)
            if not np.all((self.residue_pairs >= 0) *
                          (self.residue_pairs < self.reference_frame.n_residues)): 
                raise ValueError('contacts requests a residue that is not '\
                                 'in the permitted range')

        if self.binding_pocket is not 'all':
            ref_distances, _ = md.compute_contacts(self.reference_frame, 
                                     self.residue_pairs, self.scheme,
                                     ignore_nonprotein=False, periodic = self.periodic)
            self.residue_pairs = self.residue_pairs[np.where(ref_distances<
                                     self.binding_pocket)[1]]
            if len(self.residue_pairs) == 0:
                raise ValueError('No residue pairs within binding pocket')

        return self.residue_pairs
Beispiel #44
0
def test_contact_2():
    pdb = md.load(get_fn('1vii_sustiva_water.pdb'))
    dists, pairs = md.compute_contacts(pdb, scheme='closest')
    for r0, r1 in pairs:
        assert pdb.topology.residue(r0).name != 'HOH'
        assert pdb.topology.residue(r1).name != 'HOH'

    # spot check one of the pairs
    r0, r1 = pairs[10]
    atoms_r0 = [a.index for a in pdb.topology.residue(r0).atoms]
    atoms_r1 = [a.index for a in pdb.topology.residue(r1).atoms]

    atomdist = md.compute_distances(pdb, list(itertools.product(atoms_r0, atoms_r1)))

    np.testing.assert_array_equal(dists[:, 10], np.min(atomdist, axis=1))

    maps = md.geometry.squareform(dists, pairs)
    for i, (r0, r1) in enumerate(pairs):
        for t in range(pdb.n_frames):
            eq(maps[t, r0, r1], dists[t, i])
def find_respairs_that_changed(fnames,
                               scheme = 'ca',    # or 'closest' or 'closest-heavy'
                               threshold = 0.4,
                               stride = 100,
                               max_respairs = 1000):
    '''

    Parameters
    ----------
    fnames : list of paths to trajectories

    scheme : 'ca' or 'closest' or 'closest-heavy'

    threshold : float
        contact threshold (nm)
    '''
    distances = []
    for fname in fnames:
        traj = md.load(fname,stride=stride)
        pairwise_distances,residue_pairs = md.compute_contacts(traj,scheme=scheme)
        distances.append(pairwise_distances)
    distances = np.vstack(distances)

    # identify contacts that change by counting how many times the distances were
    # greater than and less than the threshold
    num_times_greater_than = (distances>threshold).sum(0)
    num_times_less_than = (distances<threshold).sum(0)
    changed = (num_times_greater_than > 0) * (num_times_less_than > 0)
    print("Number of contacts that changed: {0}".format(changed.sum()))
    print("Total number of possible contacts: {0}".format(len(residue_pairs)))

    if len(changed) > max_respairs:
        n_diff = np.min(np.vstack((num_times_less_than,num_times_greater_than)),0)
        indices = sorted(np.arange(len(n_diff)),key=lambda i:-n_diff[i])
        changed = indices[:max_respairs]

    # now turn this bitmask into a list of relevant residue pairs
    respairs_that_changed = residue_pairs[changed]

    return respairs_that_changed
Beispiel #46
0
    def prepare_trajectory(self, trajectory):
        """Prepare a trajectory for distance calculations based on the contact map.

        Each frame in the trajectory will be represented by a vector where
        each entries represents the distance between two residues in the structure.
        Depending on what contacts you pick to use, this can be a 'native biased'
        picture or not.

        Paramters
        ---------
        trajectory : mdtraj.Trajectory
            The trajectory to prepare

        Returns
        -------
        pairwise_distances : ndarray
            1D array of various residue-residue distances
        """
        # the result of md.compute_contacts is a tuple, where the distances are
        # returned in the first element, and a list of contacts calculated are
        # returned in the second element
        return md.compute_contacts(trajectory, self.contacts, self.scheme)[0]
def get_distances(fname, scheme, stride):
    '''
    Function callable by a multiprocessing Pool
    
    Parameters
    ----------
    fname : string
        filename of trajectory
    scheme : string
        'ca' or 'closest' or 'closest-heavy'
    stride : int
        thinning factor: only look at every `stride`th frame
        
    Returns
    -------
    pairwise_distances : numpy array
    
    residue_pairs : list of tuples
    '''
    traj = md.load(fname, stride = stride)
    pairwise_distances,residue_pairs = md.compute_contacts(traj, scheme = scheme)
    return pairwise_distances, residue_pairs
def read_and_featurize_iter(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, contact_residues = None):

	a = time.time()
	dihedral_indices = []
	residue_order = []
	if len(dihedral_residues) > 0:
		for dihedral_type in dihedral_types:
			if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues))

		#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

		#print("feauturizing manually:")
		dihedral_angles = []

		for dihedral_type in dihedral_indices:
			angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type))
			dihedral_angles.append(np.sin(angles))
			dihedral_angles.append(np.cos(angles))

		manual_features = np.transpose(np.concatenate(dihedral_angles))

	if len(contact_residues) > 0:
		contact_features = []
		for chunk in md.iterload(traj_file, chunk = 10000):
			
			fixed_traj = fix_traj(chunk)
			fixed_top = fixed_traj.topology
			distance_residues = []
			res_objects = [r for r in fixed_top.residues]
			for r in contact_residues:
				for res in res_objects:
					if res.resSeq == r and len(res._atoms) > 5:
						#print res._atoms
						distance_residues.append(res.index)
			if len(contact_residues) != len(distance_residues):
				print "Residues are missing"
				print len(contact_residues)
				print len(distance_residues)
				#sys.exit()
				#return None
			
			combinations = itertools.combinations(distance_residues, 2)
			pairs = [c for c in combinations]
			#print pairs
			
			contact_features.append(md.compute_contacts(fixed_traj, contacts = pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0])
		
		contact_features = np.concatenate(contact_features)

		if len(dihedral_residues) > 0: 
			manual_features = np.column_stack((manual_features, contact_features))
		else:
			manual_features = contact_features


	b = time.time()

	print("new features %s has shape: " %traj_file)
	print(np.shape(manual_features))

	if condition is None:
		condition = get_condition(traj_file)

	verbosedump(manual_features, "%s/%s.h5" %(features_dir, condition))
Beispiel #49
0
def featurize(path_to_files,model_name):

    files = glob(path_to_files)
    print("Number of files: {0}".format(len(files)))

    # timestep between frames: 250 picoseconds

    #### A. FEATURE EXTRACTION ####

    ### Step 1: identifying interresidue contacts that change

    # compute full contact maps for a strided subset of the simulation frames

    strided_distances=[]
    stride=100 # stride within trajectory
    traj_thin=5 # only look at 1 in traj_thin trajectories
    scheme = 'ca'
    threshold = 0.8 # contact threshold in angstroms

    for f in files[::traj_thin]:
        traj = md.load(f,stride=stride)
        distances,residue_pairs = md.compute_contacts(traj,scheme=scheme)
        strided_distances.append(distances)

    strided_distances = np.vstack(strided_distances)

    # identify contacts that change by counting how many times the distances were
    # greater than and less than the threshold
    num_times_greater_than = (strided_distances>threshold).sum(0)
    num_times_less_than = (strided_distances<threshold).sum(0)
    changed = (num_times_greater_than > 0) * (num_times_less_than > 0)
    print("Number of contacts that changed: {0}".format(changed.sum()))
    print("Total number of possible contacts: {0}".format(len(residue_pairs)))

    # now turn this bitmask into a list of relevant residue pairs
    respairs_that_changed = residue_pairs[changed]

    # save this list!
    np.save('{0}_respairs_that_changed.npy'.format(model_name),respairs_that_changed)

    ### Step 2: extract these selected features from the full dataset

    X = []

    traj_thin=1 # only look at 1 in traj_thin trajectories
    files_of_interest = files[::traj_thin]

    for i,f in enumerate(files_of_interest):
        print('{0}/{1}'.format(i,len(files_of_interest)))
        traj = md.load(f)
        distances,_ = md.compute_contacts(traj,contacts=respairs_that_changed,scheme=scheme)

        X.append(distances)

    print("Initial dimensionality: {0}".format(X[0].shape[1]))
    print("# frames: {0}".format(np.vstack(X).shape[0]))

    ##### B. KINETIC DISTANCE LEARNING #####
    tica = pyemma.coordinates.tica(X)
    Y = tica.get_output()

    # save tica model and output
    np.savez_compressed('{0}_tica.npz'.format(model_name),*Y)
    print("Dimensionality after tICA, retaining enough eigenvectors to explain 0.95 of kinetic variation: {0}".format(np.vstack(Y).shape[1]))
Beispiel #50
0
from __future__ import print_function
import mdtraj as md
import numpy as np
import itertools

t=md.load('1yrc_added.pdb')                         # reference

# Extract interface index
group1 = range(0,85)
group2 = range(85,98)
pairs = list(itertools.product(group1, group2))
A=md.compute_contacts(t, pairs, scheme='closest-heavy')

H=[] 
A1=min(A[0])   # distance array
A2=A[1]        # residue pairs 

for x in range(len(A1)):
	if A1[x]<=1.0: # condition 1
		H.append(x)

# Extract the index from A2 which satisfy condition1 and save it to A3
A3=A2[H]
A4=[]
A5=[]

# Extract the index of residues which belong to protein
for x in range(len(A3)):
	A4.append(A3[x][0])

A5=list(set(A4)) # the protein residues' index which consist of the interface
    print "  Loading BeadBead.dat"
    beadbead = np.loadtxt("BeadBead.dat",dtype=str) 
    sigij = beadbead[:,5].astype(float)
    epsij = beadbead[:,6].astype(float)
    deltaij = beadbead[:,7].astype(float)
    interaction_numbers = beadbead[:,4].astype(int)
    pairs = beadbead[:,:2].astype(int) 
    pairs -= np.ones(pairs.shape,int)

    pairs = pairs[ interaction_numbers != 0 ]
    sigij = sigij[ interaction_numbers != 0 ]

    print "  Computing distances with mdtraj..."
    traj = md.load("traj.xtc",top="Native.pdb")
    distances = md.compute_contacts(traj,pairs)
    contacts = (distances[0][:] <= 1.2*sigij).astype(int)
    
    keep_frames = (((x > bounds[0]).astype(int)*(x < bounds[1]).astype(int)) == 1)
    contacts = contacts[keep_frames,:]
     
    print "  Computing contact probability..."
    probability = sum(contacts.astype(float))/contacts.shape[0]

    C = np.zeros(Qref.shape,float)
    for k in range(len(pairs)):
        C[pairs[k][0],pairs[k][1]] = probability[k]

    print "  Plotting..."
    plt.figure()
    plt.subplot(1,1,1,aspect=1)
def read_and_featurize_custom(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, contact_residues = None):
	#if "23" not in traj_file and "24" not in traj_file: return
	top = md.load_frame(traj_file,index = 0).topology
	#atom_indices = [a.index for a in top.atoms if a.residue.resSeq != 130]
	atom_indices = [a.index for a in top.atoms]
	traj = md.load(traj_file, atom_indices=atom_indices)
	print traj_file
	#print traj
	#print("loaded trajectory")

	'''
	a = time.time()
	featurizer = DihedralFeaturizer(types = ['phi', 'psi', 'chi2'])
	features = featurizer.transform(traj)
	b = time.time()
	#print(b-a)
	print("original features has dim")
	print(np.shape(features))
	'''
	a = time.time()
	dihedral_indices = []
	residue_order = []
	if len(dihedral_residues) > 0:
		for dihedral_type in dihedral_types:
			if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues))

		#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

		#print("feauturizing manually:")
		dihedral_angles = []

		for dihedral_type in dihedral_indices:
			angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type))
			dihedral_angles.append(np.sin(angles))
			dihedral_angles.append(np.cos(angles))

		manual_features = np.transpose(np.concatenate(dihedral_angles))

	if len(contact_residues) > 0:
		fixed_traj = fix_traj(traj)
		fixed_top = fixed_traj.topology
		distance_residues = []
		res_objects = [r for r in fixed_top.residues]
		for r in contact_residues:
			for res in res_objects:
				if res.resSeq == r and len(res._atoms) > 5:
					#print res._atoms
					distance_residues.append(res.index)
		if len(contact_residues) != len(distance_residues):
			print "Residues are missing"
			print len(contact_residues)
			print len(distance_residues)
			#sys.exit()
			#return None
		
		combinations = itertools.combinations(distance_residues, 2)
		pairs = [c for c in combinations]
		#print pairs
		contact_features = md.compute_contacts(traj, contacts = pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
		#print contact_features
		#print(np.shape(contact_features))
		if len(dihedral_residues) > 0: 
			manual_features = np.column_stack((manual_features, contact_features))
		else:
			manual_features = contact_features


	b = time.time()

	print("new features %s has shape: " %traj_file)
	print(np.shape(manual_features))

	if condition is None:
		condition = get_condition(traj_file)

	verbosedump(manual_features, "%s/%s.h5" %(features_dir, condition))
def create_features(ref, prot, lig, d):
    set1 = [ref.topology.atom(i).residue.index for i in prot]
    set2 = [ref.topology.atom(i).residue.index for i in lig]
    contacts = md.compute_contacts(ref,contacts=list(itertools.product(set1,set2)))
    atom_set = contacts[1][np.where(contacts[0]<d)[1],:]
    return atom_set
Beispiel #54
0
        eps[i][0] = a-33
    elif 264<a<342:
        eps[i][0] = a-67

    if 29<b<176:
        eps[i][1] = b-30
    elif 178<b<231:
        eps[i][1] = b-33
    elif 264<b<342:
        eps[i][1] = b-67

t1=md.load('3SN6-R.pdb')
t2=md.load('2RH1.pdb')

eps1 = [[eps[i][0], eps[i][1]] for i in range(len(eps))]
dist1=md.compute_contacts(t1, contacts=eps1, scheme='closest')
dist2=md.compute_contacts(t2, contacts=eps1, scheme='closest')

deltaDist = [dist1[0][0][i]-dist2[0][0][i] for i in range(len(dist1[0][0]))]
x = [eps[i][2] for i in range(len(eps))]
plt.scatter(x , np.absolute(deltaDist))
plt.savefig('fig1.png')
plt.show()
####################################################################################################################
# calculating dihedrals
####################################################################################################################
top1 = md.load('3SN6-R.pdb').topology
top2 = md.load('2RH1.pdb').topology

dhdrls101 = []
dhdrls102 = []
def calculate_metrics(traj, features, d):
    contacts = md.compute_contacts(traj, contacts = features)
    h = np.sum(contacts[0] < .5, axis=1)
    return h
import mdtraj as md
import matplotlib.pyplot as plt
import numpy as np

from msmbuilder import dataset

import seaborn as sns
sns.set_style("whitegrid")
sns.set_context("poster")

#Load trajectory with ensembler models
t_models = md.load("../ensembler-models/traj-refine_implicit_md.xtc", top = "../ensembler-models/topol-renumbered-implicit.pdb")

#define 'difference' as hydrogen bond distance

k295e310 = md.compute_contacts(t_models, [[28,43]])
e310r409 = md.compute_contacts(t_models, [[43,142]])
difference = e310r409[0] - k295e310[0]

#define 'rmsd' as RMSD of activation loop from 2SRC structure

SRC2 = md.load("../reference-structures/SRC_2SRC_A.pdb")

Activation_Loop_SRC2 = [atom.index for atom in SRC2.topology.atoms if (138 <= atom.residue.index <= 158)]
Activation_Loop_Src = [atom.index for atom in t_models.topology.atoms if (138 <= atom.residue.index <= 158)]

SRC2.atom_slice(Activation_Loop_SRC2)
t_models.atom_slice(Activation_Loop_Src)

difference = difference[:,0]