Example #1
0
    def __init__(self, topology_file, trajectory, start_frame=0,
                 num_frames=0, supporting_file=None, rho_bulk=None):
        """Initialize WaterAnalysis object for a trajectory and
        corresponding topology file.
        
        Parameters
        ----------
        topology_file : string
            Filename of the system topology file.
        trajectory : string
            Filename of the molecular dynamics trajectory.
        start_frame : int, optional
            The frame index from which the calculations will begin. Default: 0
        num_frames : int, optional
            The total number of frames or the length of simulation over which 
            calculations will be performed. Default: 0
        supporting_file : None, optional
            Filename of additional file containing non-bonded parameters for
            every particle in the system. Default: None
        rho_bulk : float
            Reference bulk water density to be used in calculations. Default: None
        """

        self.topology_file = topology_file
        self.trajectory = trajectory
        self.supporting_file = supporting_file
        self.start_frame = start_frame
        assert num_frames >= 100, "A minimum of 100 frames are required for analysis."
        self.num_frames = num_frames
        self.check_topology_requiremnts(self.topology_file, self.supporting_file)
        first_frame = md.load_frame(self.trajectory, self.start_frame, top=self.topology_file)
        assert first_frame.unitcell_lengths is not None, "Could not detect unit cell information."
        self.topology = first_frame.topology
        self.box_type = "Unspecified"
        orthogonal = False
        try:
            orthogonal = np.allclose(md.load_frame(self.trajectory, 0, top=self.topology_file).unitcell_angles, 90)
            if orthogonal:
                self.box_type = "Orthorhombic"
        except Exception as e:
            print("WARNING: Only orthorhombic periodic boxes are currently supported.")
        self.rho_bulk = rho_bulk
        if self.rho_bulk is None:
            self.rho_bulk = 0.0334
        super_wat_select_exp = ""
        for i, wat_res in enumerate(_WATER_RESNAMES):
            if i < len(_WATER_RESNAMES) - 1:
                super_wat_select_exp += "resname %s or " % wat_res
            else:
                super_wat_select_exp += "resname %s" % wat_res        
        self.all_atom_ids = self.topology.select("all")
        self.wat_atom_ids = self.topology.select("water")
        self.prot_atom_ids = self.topology.select("protein")
        if self.wat_atom_ids.shape[0] == 0:
            self.wat_atom_ids = self.topology.select(super_wat_select_exp)
        assert (self.wat_atom_ids.shape[0] != 0), "Unable to recognize waters in the system!"
        assert (self.topology.atom(self.wat_atom_ids[0]).name == "O"), "Failed while constructing water oxygen atom indices!"
        self.wat_oxygen_atom_ids = np.asarray([atom for atom in self.wat_atom_ids if self.topology.atom(atom).name == "O"])
        self.non_water_atom_ids = np.setdiff1d(self.all_atom_ids, self.wat_atom_ids)
        assert (self.wat_atom_ids.shape[0] + self.non_water_atom_ids.shape[0] == self.all_atom_ids.shape[0]), "Failed to partition atom indices in the system correctly!"
Example #2
0
    def onJoinTrajectories(self):
        target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0]

        fn1 = self.trajectory_filename_1
        fn2 = self.trajectory_filename_2

        r1 = self.reverse_traj_1
        r2 = self.reverse_traj_2

        traj_1 = md.load_frame(fn1, index=0)
        traj_2 = md.load_frame(fn2, index=0)

        # Create empty trajectory
        if self.join_mode == 'time':
            traj_join = traj_1.join(traj_2)
            axis = 0
        elif self.join_mode == 'atoms':
            traj_join = traj_1.stack(traj_2)
            axis = 1

        target_traj = md.Trajectory(xyz=np.empty((0, traj_join.n_atoms, 3)), topology=traj_join.topology)
        target_traj.save(target_filename)

        chunk_size = self.chunk_size
        table = tables.open_file(target_filename, 'a')
        for i, (c1, c2) in enumerate(izip(md.iterload(fn1, chunk=chunk_size), md.iterload(fn2, chunk=chunk_size))):
            xyz_1 = c1.xyz[::-1] if r1 else c1.xyz
            xyz_2 = c2.xyz[::-1] if r2 else c2.xyz
            xyz = np.concatenate((xyz_1, xyz_2), axis=axis)

            table.root.coordinates.append(xyz)
            table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32))

        table.close()
Example #3
0
def test_load_frame():
    files = [
        "frame0.nc",
        "frame0.h5",
        "frame0.xtc",
        "frame0.trr",
        "frame0.dcd",
        "frame0.mdcrd",
        "frame0.binpos",
        "frame0.xyz",
        "frame0.lammpstrj",
    ]
    if not (on_win and on_py3):
        files.append("legacy_msmbuilder_trj0.lh5")

    trajectories = [md.load(get_fn(f), top=get_fn("native.pdb")) for f in files]
    rand = [np.random.randint(len(t)) for t in trajectories]
    frames = [md.load_frame(get_fn(f), index=r, top=get_fn("native.pdb")) for f, r in zip(files, rand)]

    for traj, frame, r, f in zip(trajectories, frames, rand, files):

        def test():
            eq(traj[r].xyz, frame.xyz)
            eq(traj[r].unitcell_vectors, frame.unitcell_vectors)
            eq(traj[r].time, frame.time, err_msg="%d, %d: %s" % (traj[r].time[0], frame.time[0], f))

        test.description = "test_load_frame: %s" % f
        yield test

    t1 = md.load(get_fn("2EQQ.pdb"))
    r = np.random.randint(len(t1))
    t2 = md.load_frame(get_fn("2EQQ.pdb"), r)
    eq(t1[r].xyz, t2.xyz)
def sample_clusters(clusterer_dir, features_dir, traj_dir, save_dir, n_samples):
	clusters_map = dist_to_means(clusterer_dir, features_dir)
	if not os.path.exists(save_dir): os.makedirs(save_dir)
	
	#non_palm = get_traj_no_palm(traj_dir)

	trajectories = get_trajectory_files(traj_dir)

	for cluster in range(0, len(clusters_map.keys())):
		for s in range(0, n_samples):
			sample = clusters_map[cluster][s]
			traj_id = sample[0]
			frame = sample[1]
			traj = trajectories[traj_id]

			top = md.load_frame(traj, index=frame).topology
			indices = [a.index for a in top.atoms if str(a.residue)[0:3] != "SOD" and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341]

			conformation = md.load_frame(traj, index=frame, atom_indices=indices)
			conformation.save_pdb("%s/cluster%d_sample%d.pdb" %(save_dir, cluster, s))
	
	remove_ter(save_dir)
	reorder(save_dir)
	#remove_palm(save_dir)
	new_dir = reimage(save_dir)
Example #5
0
def test_load_frame():
    files = [
        'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd',
        'frame0.mdcrd', 'frame0.binpos', 'frame0.xyz', 'frame0.lammpstrj'
    ]
    if not (on_win and on_py3):
        files.append('legacy_msmbuilder_trj0.lh5')

    trajectories = [
        md.load(get_fn(f), top=get_fn('native.pdb')) for f in files
    ]
    rand = [np.random.randint(len(t)) for t in trajectories]
    frames = [
        md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb'))
        for f, r in zip(files, rand)
    ]

    for traj, frame, r, f in zip(trajectories, frames, rand, files):

        def test():
            eq(traj[r].xyz, frame.xyz)
            eq(traj[r].unitcell_vectors, frame.unitcell_vectors)
            eq(traj[r].time,
               frame.time,
               err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f))

        test.description = 'test_load_frame: %s' % f
        yield test

    t1 = md.load(get_fn('2EQQ.pdb'))
    r = np.random.randint(len(t1))
    t2 = md.load_frame(get_fn('2EQQ.pdb'), r)
    eq(t1[r].xyz, t2.xyz)
Example #6
0
def map_drawn_samples(selected_pairs_by_state, trajectories, top=None):
    """Lookup trajectory frames using pairs of (trajectory, frame) indices.

    Parameters
    ----------
    selected_pairs_by_state : np.ndarray, dtype=int, shape=(n_states, n_samples, 2)
        selected_pairs_by_state[state, sample] gives the (trajectory, frame)
        index associated with a particular sample from that state.
    trajectories : list(md.Trajectory) or list(np.ndarray) or list(filenames)
        The trajectories assocated with sequences,
        which will be used to extract coordinates of the state centers
        from the raw trajectory data.  This can also be a list of np.ndarray
        objects or filenames.  If they are filenames, mdtraj will be used to load
    top : md.Topology, optional, default=None
        Use this topology object to help mdtraj load filenames

    Returns
    -------
    frames_by_state : mdtraj.Trajectory
        Output will be a list of trajectories such that frames_by_state[state]
        is a trajectory drawn from `state` of length `n_samples`.  If trajectories
        are numpy arrays, the output will be numpy arrays instead of md.Trajectories
    
    Examples
    --------
    >>> selected_pairs_by_state = hmm.draw_samples(sequences, 3)
    >>> samples = map_drawn_samples(selected_pairs_by_state, trajectories)
    
    Notes
    -----
    YOU are responsible for ensuring that selected_pairs_by_state and 
    trajectories correspond to the same dataset!
    
    See Also
    --------
    utils.map_drawn_samples : Extract conformations from MD trajectories by index.
    ghmm.GaussianFusionHMM.draw_samples : Draw samples from GHMM    
    ghmm.GaussianFusionHMM.draw_centroids : Draw centroids from GHMM    
    """

    frames_by_state = []

    for state, pairs in enumerate(selected_pairs_by_state):
        if isinstance(trajectories[0], str):
            import mdtraj as md
            if top:
                process = lambda x, frame: md.load_frame(x, frame, top=top)
            else:
                process = lambda x, frame: md.load_frame(x, frame)
        else:
            process = lambda x, frame: x[frame]

        frames = [process(trajectories[trj], frame) for trj, frame in pairs]
        try:  # If frames are mdtraj Trajectories
            state_trj = frames[0][0:0].join(frames)  # Get an empty trajectory with correct shape and call the join method on it to merge trajectories
        except AttributeError:
            state_trj = np.array(frames)  # Just a bunch of np arrays
        frames_by_state.append(state_trj)
    
    return frames_by_state
Example #7
0
def export_frame_coordinates(topology, trajectory, nframe, output=None):
    """
    Extract a single frame structure from a trajectory.
    """
    if output is None:
        basename, ext = os.path.splitext(trajectory)
        output = '{}.frame{}.inpcrd'.format(basename, nframe)

    # ParmEd sometimes struggles with certain PRMTOP files
    if os.path.splitext(topology)[1] in ('.top', '.prmtop'):
        top = AmberPrmtopFile(topology)
        mdtop = mdtraj.Topology.from_openmm(top.topology)
        traj = mdtraj.load_frame(trajectory, int(nframe), top=mdtop)
        structure = parmed.openmm.load_topology(top.topology,
                                                system=top.createSystem())
        structure.box_vectors = top.topology.getPeriodicBoxVectors()

    else:  # standard protocol (the topology is loaded twice, though)
        traj = mdtraj.load_frame(trajectory, int(nframe), top=topology)
        structure = parmed.load_file(topology)

    structure.positions = traj.openmm_positions(0)

    if traj.unitcell_vectors is not None:  # if frame provides box vectors, use those
        structure.box_vectors = traj.openmm_boxes(0)

    structure.save(output, overwrite=True)
Example #8
0
def sample_clusters(clusterer_dir, features_dir, traj_dir, save_dir,
                    n_samples):
    clusters_map = dist_to_means(clusterer_dir, features_dir)
    if not os.path.exists(save_dir): os.makedirs(save_dir)

    #non_palm = get_traj_no_palm(traj_dir)

    trajectories = get_trajectory_files(traj_dir)

    for cluster in range(0, len(list(clusters_map.keys()))):
        for s in range(0, n_samples):
            sample = clusters_map[cluster][s]
            traj_id = sample[0]
            frame = sample[1]
            traj = trajectories[traj_id]

            top = md.load_frame(traj, index=frame).topology
            indices = [
                a.index for a in top.atoms if str(a.residue)[0:3] != "SOD"
                and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341
            ]

            conformation = md.load_frame(traj,
                                         index=frame,
                                         atom_indices=indices)
            conformation.save_pdb("%s/cluster%d_sample%d.pdb" %
                                  (save_dir, cluster, s))

    remove_ter(save_dir)
    reorder(save_dir)
    #remove_palm(save_dir)
    new_dir = reimage(save_dir)
Example #9
0
def test_load_frame():
    files = [
        'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd',
        'frame0.mdcrd', 'frame0.binpos', 'legacy_msmbuilder_trj0.lh5'
    ]
    trajectories = [
        md.load(get_fn(f), top=get_fn('native.pdb')) for f in files
    ]
    rand = [np.random.randint(len(t)) for t in trajectories]
    frames = [
        md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb'))
        for f, r in zip(files, rand)
    ]

    for traj, frame, r, f in zip(trajectories, frames, rand, files):
        eq(traj[r].xyz, frame.xyz)
        eq(traj[r].unitcell_vectors, frame.unitcell_vectors)
        eq(traj[r].time,
           frame.time,
           err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f))

    t1 = md.load(get_fn('2EQQ.pdb'))
    r = np.random.randint(len(t1))
    t2 = md.load_frame(get_fn('2EQQ.pdb'), r)
    eq(t1[r].xyz, t2.xyz)
Example #10
0
def save_traj_w_md_load_frame(reader, sets):
    # Creates a single trajectory object from a "sets" array via md.load_frames
    traj = None
    for file_idx, frame_idx in vstack(sets):
        if traj is None:
            traj = md.load_frame(reader.filenames[file_idx], frame_idx, reader.topfile)
        else:
            traj = traj.join(md.load_frame(reader.filenames[file_idx], frame_idx, reader.topfile))
    return traj
Example #11
0
def test_residues_map(traj_file_1, traj_file_2, residues, residues_map):
	traj_1 = md.load_frame(traj_file_1, index = 0)
	traj_2 = md.load_frame(traj_file_2, index = 0)
	top1 = traj_1.topology
	top2 = traj_2.topology
	for residue in residues:
		new_residue = residues_map[residue]
		print("Original residues:")
		residues = [r for r in top1.residues if r.resSeq == residue and r.is_protein]
		print(residues[0])
		print("New residues:")
		residues = [r for r in top2.residues if r.resSeq == new_residue and r.is_protein]
		print(residues[0])
	return
Example #12
0
def test_residues_map_num_atoms(traj_file_1, traj_file_2, residues, residues_map):
	traj_1 = md.load_frame(traj_file_1, index = 0)
	traj_2 = md.load_frame(traj_file_2, index = 0)
	top1 = traj_1.topology
	top2 = traj_2.topology
	for residue in residues:
		new_residue = residues_map[residue]
		atoms = [a.index for a in top1.atoms if a.residue.resSeq == residue and a.residue.is_protein]
		len1 = len(atoms)
		atoms = [a.index for a in top2.atoms if a.residue.resSeq == new_residue and a.residue.is_protein]
		len2 = len(atoms)
		if (len1 != len2) or (len1 == len2):
			print("Atom number %d %d doesn't match for residue %d" %(len1, len2, residue))
	return
Example #13
0
def generate_traj_from_stateinds(inds, meta, atom_selection='all'):
    """
    Concatenate several frames from different trajectories to create a new one.

    Parameters
    ----------
    inds: list of tuples, Each element of the list has to be a 2D tuple of ints
        (traj_index, frame_index)

    meta: a metadata object
    atom_selection: str, Which atoms to load

    Returns
    -------
    traj: mdtraj.Trajectory
    """
    frame_list = []
    for traj_i, frame_i in inds:
        top = mdtraj.load_prmtop(meta.loc[traj_i]['top_fn'])
        atoms = top.select(atom_selection)

        frame_list.append(
            mdtraj.load_frame(meta.loc[traj_i]['traj_fn'], atom_indices=atoms,
                              index=frame_i, top=meta.loc[traj_i]['top_fn'])
        )
    traj = mdtraj.join(frame_list, check_topology=False)
    traj.center_coordinates()
    traj.superpose(traj, 0)
    return traj
Example #14
0
def read_and_featurize(filename, dihedrals=['chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))
	top = md.load_frame(filename, 0).topology
	#print("got top")
	atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"]
	print(len(atom_indices))
	#atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341]
	#print("got indices")
	traj = md.load(filename, stride=1000, atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
def loadFrames(confs_by_state):
    """
    input is array of arrays
    """
    frames = []
    for elem in confs_by_state:
        trajFrames = []
        for trajFrame in elem:
            file = os.path.basename(trajFrame[0])
            frame = trajFrame[1]

            regex = "(.*)_traj.*_(\d*).xtc"
            m = re.match(regex,file)
            projectName = m.group(1)
            trajNum = m.group(2)

            #now find the actual trajectory
            #TODO also get the regular traj
            originalTraj = "../%s/analysis/full/traj_full_%s.xtc"%(projectName,trajNum)

            #load the ref
            ref = "../%s/analysis/full/ref.pdb"%projectName
            print ("loading %s frame %s"%(originalTraj,frame))
            loadedFrame = md.load_frame(originalTraj,frame,top=ref)

            trajFrames.append(loadedFrame)

        frames.append(trajFrames)

    return frames
Example #16
0
def get_feature_list(feature_residues_csv, structure_file):
    feature_names = generate_features(feature_residues_csv)
    structure = md.load_frame(structure_file, index=0)
    all_resSeq = [
        r.resSeq for r in structure.topology.residues if r.is_protein
    ]
    all_res = [
        str(r).title() for r in structure.topology.residues if r.is_protein
    ]

    feature_list = []
    for i, feature_name in enumerate(feature_names):
        try:
            res_i = int(feature_name[0])
            res_j = int(feature_name[1])
        except:
            res_i = int(feature_name[0][1])
            res_j = int(feature_name[1][1])

        res_i_idx = all_resSeq.index(res_i)
        res_i_name = all_res[res_i_idx]

        res_j_idx = all_resSeq.index(res_j)
        res_j_name = all_res[res_j_idx]

        feature_list.append((res_i_name, res_j_name))

    return (feature_list)
Example #17
0
def compute_contacts_below_cutoff(traj_file_frame, cutoff = 100000.0, contact_residues = [], anton = False):
	traj_file = traj_file_frame[0]
	frame = md.load_frame(traj_file, index = 0)
	#frame = fix_traj(frame)
	top = frame.topology
	
	distance_residues = []
	res_indices = []
	resSeq_to_resIndex = {}
	residue_full_infos = []

	for i in range(0, len(contact_residues)):
		residue = contact_residues[i]
		indices = [r.index for r in top.residues if r.resSeq == residue[1] and r.chainid == residue[0] and not r.is_water]
		if len(indices) == 0:
			print("No residues in trajectory for residue %d" %residue)
			continue
		else:
			ind = indices[0]
			for j in indices:
				if j != ind: 
					#print("Warning: multiple res objects for residue %d " %residue)
					if "CB" in [str(a) for a in r.atoms for r in top.residues if r.index == ind]:
						ind = j
			res_indices.append(ind)
			distance_residues.append(residue)
			resSeq_to_resIndex[residue] = ind
	
	resSeq_combinations = itertools.combinations(distance_residues, 2)
	res_index_combinations = []
	resSeq_pairs = [c for c in resSeq_combinations]
	for combination in resSeq_pairs:
		res0 = combination[0]
		res1 = combination[1]
		res_index0 = resSeq_to_resIndex[res0]
		res_index1 = resSeq_to_resIndex[res1]
		res_index_combinations.append((res_index0, res_index1))


	final_resSeq_pairs = []
	final_resIndex_pairs = []

	distances = md.compute_contacts(frame, contacts = res_index_combinations, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
	#print(distances)
	print(np.shape(distances))
	for i in range(0, len(distances[0])):
		distance = distances[0][i]
		#print(distance)
		if distance < cutoff:
			final_resIndex_pairs.append(res_index_combinations[i])
			final_resSeq_pairs.append(resSeq_pairs[i])

	for pair in final_resIndex_pairs:
		info0 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[0]]
		info1 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[1]]
		residue_full_infos.append((info0, info1))

	print(len(final_resSeq_pairs))
	print(len(final_resIndex_pairs))
	return((final_resSeq_pairs, residue_full_infos))
Example #18
0
def find_most_important_residues_in_tIC(traj_file, tica_object, tic_features_csv, contact_residues,tic_residue_csv, feature_coefs_csv, duplicated_feature_coefs_csv, cutoff):
	try:
		tica = verboseload(tica_object)
	except:
		tica = load_dataset(tica_object)
	print traj_file
	traj = md.load_frame(traj_file, 0)
	#traj = fix_traj(traj)
	top = traj.topology 
	#residue_pairs = compute_contacts_below_cutoff([traj_file, [0]], cutoff = cutoff, contact_residues = contact_residues, anton = True)
	residue_pairs = generate_features(tic_features_csv)
	new_residue_pairs = []
	for pair in residue_pairs:
		new_residue_pairs.append(("%s%d.%d" %(pair[0][2], pair[0][1], pair[0][0])), ("%s%d.%d" %(pair[1][2], pair[1][1], pair[1][0])))
	residue_pairs = new_residue_pairs
	#print traj_file

	
	top_indices_per_tIC = {}
	feature_coefs_per_tIC = {}
	duplicated_feature_coefs_per_tIC = {}


	#for each tIC:
		#for each feature, get the absolute component value
		#add to feature_coefs_per_tIC dictionary the absolute coefficient for that tIC
		#duplicate them for the analysis where we look at residues individually
		#sort by absolute coefficient value

	#for each tIC:
		#

	for i in range(0, np.shape(tica.components_)[0]):
		print i
		index_components = [(j,abs(tica.components_[i][j])) for j in range(0,np.shape(tica.components_)[1])]
		feature_coefs_per_tIC[i] = [component[1] for component in index_components]
		duplicated_feature_coefs_per_tIC[i] = [j for k in feature_coefs_per_tIC[i] for j in (k, k)] 
		index_components = sorted(index_components, key= lambda x: x[1],reverse=True)
		print(index_components[0:10])
		list_i = [index_components[j][0] for j in range(0,len(index_components))]
		top_indices_per_tIC[i] = list_i
	
	top_residues_per_tIC = {}
	for i in range(0, np.shape(tica.components_)[0]):
		top_residues_per_tIC[i] = []
		for index in top_indices_per_tIC[i]:
			residues = residue_pairs[index]
			top_residues_per_tIC[i].append(residues)
		top_residues_per_tIC[i] = [item for sublist in top_residues_per_tIC[i] for item in sublist]

	residue_list = residue_pairs

	feature_coefs_per_tIC["residues_0"] = [pair[0] for pair in residue_list]
	feature_coefs_per_tIC["residues_1"] = [pair[1] for pair in residue_list]
	duplicated_feature_coefs_per_tIC["residues"] = [residue for residue_pair in residue_list for residue in residue_pair]

	write_map_to_csv(tic_residue_csv, top_residues_per_tIC, [])
	write_map_to_csv(feature_coefs_csv, feature_coefs_per_tIC, [])
	write_map_to_csv(duplicated_feature_coefs_csv, duplicated_feature_coefs_per_tIC, [])
	return
Example #19
0
def save_features_to_residues_map(traj_file, contact_residues, feature_residues_csv, cutoff, residues_map = None, exacycle = False):
	if residues_map is not None:
		contact_residues = [r for r in contact_residues if r in residues_map.keys()]
		if exacycle: contact_residues = [residues_map[key] for key in contact_residues]

	traj = md.load_frame(traj_file, 0)
	#traj = fix_traj(traj)
	top = traj.topology 
	residue_pairs, residue_infos = compute_contacts_below_cutoff([traj_file, [0]], cutoff = cutoff, contact_residues = contact_residues, anton = False)
	if exacycle:
		reverse_residues_map = {v: k for k, v in residues_map.items()}
		new_residue_pairs = []
		for residue_pair in residue_pairs:
			new_residue_pair = [reverse_residues_map[residue_pair[0]], reverse_residues_map[residue_pair[1]]]
			new_residue_pairs.append(new_residue_pair)
		residue_pairs = new_residue_pairs

		new_reisdue_infos = []
		for residue_info in residue_infos:
			new_residue_info = [(reverse_residues_map[residue_info[0][0]], residue_info[0][1], residue_info[0][2]), (reverse_residues_map[residue_info[1][0]], residue_info[1][1], residue_info[1][2])]
			new_residue_infos.append(new_residue_info)
		residue_infos = new_reisdue_infos

	print("There are: %d residue pairs" %len(residue_pairs))
	f = open(feature_residues_csv, "wb")
	f.write("feature, residue.1.resSeq, residue.1.res, residue.1.chain, residue.2.resSeq, residue.2.res, residue.2.chain,\n")
	for i in range(0, len(residue_infos)):
		f.write("%d, %d, %d, %d, %d, %d, %d,\n" %(i, residue_infos[i][0][0], residue_infos[i][0][1], residue_infos[i][0][2], residue_infos[i][1][0], residue_infos[i][1][1], residue_infos[i][1][2]))
	f.close()
	return 
Example #20
0
def timefld(n):
    start = dt.datetime.now()
    tr = md.load_frame('bpti-all-1%03d.dcd' % n, 23, top=pdb)
    tr.atom_slice(tr.top.select('protein'), inplace=True)
    end = dt.datetime.now()
    print('Time: ', (end - start).total_seconds())
    return tr
Example #21
0
def timefld(n):
    start = dt.datetime.now()
    tr = md.load_frame("bpti-all-1%03d.dcd" % n, 23, top=pdb)
    tr.atom_slice(tr.top.select("protein"), inplace=True)
    end = dt.datetime.now()
    print("Time: ", (end - start).total_seconds())
    return tr
Example #22
0
def rmsd_to_structure(clusters_dir, ref_dir, text):
    pdbs = get_trajectory_files(clusters_dir)

    ref = md.load_frame(ref_dir, index=0)
    rmsds = np.zeros(shape=(len(pdbs), 2))

    for i in range(0, len(pdbs)):
        print(i)
        pdb_file = pdbs[i]
        pdb = md.load_frame(pdb_file, index=0)
        rmsd = md.rmsd(pdb, ref, 0)
        rmsds[i, 0] = i
        rmsds[i, 1] = rmsd[0]

    rmsd_file = "%s/%s_rmsds.csv" % (clusters_dir, text)
    np.savetxt(rmsd_file, rmsds, delimiter=",")
Example #23
0
    def start(self):
        # read the csv file with an optional comment on the first line
        with open(self.filename) as f:
            line = f.readline()
            if not line.startswith('#'):
                f.seek(0, 0)
            df = pd.read_csv(f)

        if not all(e in df.columns for e in ('filename', 'index', 'state')):
            self.error('CSV file not read properly')

        for k in np.unique(df['state']):
            fn = self.outfn(k)
            if os.path.exists(fn):
                self.error('IOError: file exists: %s' % fn)

        frames = defaultdict(lambda: [])
        for fn, group in df.groupby('filename'):
            for _, row in group.sort('index').iterrows():
                frames[row['state']].append(
                    md.load_frame(fn, row['index'], top=self.top))

        for state, samples in frames.items():
            traj = samples[0].join(samples[1:])
            print('saving %s...' % self.outfn(state))
            traj.save(self.outfn(state), force_overwrite=False)
        print('done')
Example #24
0
 def _assert_toptraj_consistency(self):
     r""" Check if the topology and the filenames of the reader have the same n_atoms"""
     traj = mdtraj.load_frame(self.filenames[0], index=0, top=self.topfile)
     desired_n_atoms = self.featurizer.topology.n_atoms
     assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \
                                                  " and the first trajectory file, %u vs %u" % \
                                                  (desired_n_atoms, traj.xyz.shape[1])
Example #25
0
    def start(self):
        # read the csv file with an optional comment on the first line
        with open(self.filename) as f:
            line = f.readline()
            if not line.startswith('#'):
                f.seek(0, 0)
            df = pd.read_csv(f)

        if not all(e in df.columns for e in ('filename', 'index', 'state')):
            self.error('CSV file not read properly')

        for k in np.unique(df['state']):
            fn = self.outfn(k)
            if os.path.exists(fn):
                self.error('IOError: file exists: %s' % fn)

        frames = defaultdict(lambda: [])
        for fn, group in df.groupby('filename'):
            for _, row in group.sort('index').iterrows():
                frames[row['state']].append(
                    md.load_frame(fn, row['index'], top=self.top))

        for state, samples in list(frames.items()):
            traj = samples[0].join(samples[1:])
            print('saving %s...' % self.outfn(state))
            traj.save(self.outfn(state), force_overwrite=False)
        print('done')
Example #26
0
def reproject_oldata():
    r1 = redis.StrictRedis(port=6390, decode_responses=True)
    cache = redis.StrictRedis(host='bigmem0006',
                              port=6380,
                              decode_responses=True)
    execlist = r1.hgetall('anl_sequence')
    keyorder = [
        'jc_' + i[0] for i in sorted(execlist.items(), key=lambda x: x[1])
    ]
    # skip first 100 (non-sampled)
    pts = []
    bad_ref = 0
    miss = 0
    for key in keyorder:
        conf = r1.hgetall(key)
        src = int(conf['src_index'])
        ref = r1.lindex('xid:reference', src)
        if ref is not None:
            fileno, frame = eval(ref)
            ckey = 'sim:%s' % conf['name']
            xyz = cache.lindex(ckey, frame)
            if xyz is not None:
                pts.append(pickle.loads(xyz))
            else:
                tr = md.load_frame(conf['dcd'], frame, top=conf['pdb'])
                if len(tr.xyz) == 0:
                    miss += 1
                else:
                    pts.append(tr.xyz[0])
        else:
            bad_ref += 1
    traj = md.Trajectory(pts, deshaw.topo_prot.top)
    alpha = datareduce.filter_alpha(traj)
    return alpha
def sampling_along_tIC(resultdir, opath, tica_trajs, xtc_traj_folder,
                       traj_list_array, pdb_name, tIC_a):
    transformed = np.concatenate(tica_trajs)
    draw_tica_histogram_core(transformed[:, 0], transformed[:, 1], '1', '2')
    tica_trajs = {i: tica_trajs[i]
                  for i in range(len(tica_trajs))
                  }  #tica_trajs is now a dictionary
    inds = sample_dimension(tica_trajs,
                            dimension=tIC_a - 1,
                            n_frames=200,
                            scheme='random')  #sample 200 conformations
    #make trajectory
    traj = md.join(
        md.load_frame(xtc_traj_folder + traj_list_array[i],
                      index=frame_i,
                      top=xtc_traj_folder + pdb_name) for i, frame_i in inds)
    #save the trajectory
    traj.save("%s/tica-dimension-tIC%s.xtc" % (resultdir, tIC_a - 1))
    #show the samples on tICA projections
    samples_coord = []
    for i, frame_i in inds:
        samples_coord.append(
            [tica_trajs[i][frame_i][0], tica_trajs[i][frame_i][1]])
    samples_coord = np.array(samples_coord)
    print(samples_coord.shape)
    plt.plot(samples_coord[:, 0], samples_coord[:, 1], 'o-')
    plt.legend('sample')
    plt.savefig(resultdir + '/' + opath)
    def onRemoveClashes(self):
        target_filename = chisurf.widgets.save_file('H5-Trajectory file',
                                                    'H5-File (*.h5)')
        # target_filename = 'clash_dimer.h5'
        filename = self.trajectory_filename
        stride = self.stride
        min_distance = self.min_distance

        # Make empty trajectory
        frame_0 = md.load_frame(filename, 0)
        target_traj = md.Trajectory(xyz=np.empty((0, frame_0.n_atoms, 3)),
                                    topology=frame_0.topology)
        #atom_indices = np.array(self.atom_list)
        atom_selection = self.atom_list
        atom_list = target_traj.top.select(atom_selection)
        target_traj.save(target_filename)

        chunk_size = 1000
        for i, chunk in enumerate(
                md.iterload(filename, chunk=chunk_size, stride=stride)):
            xyz = chunk.xyz.copy()
            frames_below = below_min_distance(xyz,
                                              min_distance,
                                              atom_list=atom_list)
            selection = np.where(frames_below < 1)[0]
            xyz_clash_free = np.take(xyz, selection, axis=0)
            with tables.open_file(target_filename, 'a') as table:
                table.root.coordinates.append(xyz_clash_free)
                times = np.arange(table.root.time.shape[0],
                                  table.root.time.shape[0] +
                                  xyz_clash_free.shape[0],
                                  dtype=np.float32)
                table.root.time.append(times)
Example #29
0
    def __init__(self,
                 ref,
                 ref_frame=0,
                 atom_indices=None,
                 topology=None,
                 precentered=False):
        self.top = topology

        assert isinstance(
            ref_frame, int
        ), "ref_frame has to be of type integer, and not %s" % type(ref_frame)

        # Types of inputs
        # 1. Filename+top
        if isinstance(ref, str):
            # Store the filename
            self.name = ref[:]
            ref = mdtraj.load_frame(ref, ref_frame, top=topology)
            # mdtraj is pretty good handling exceptions, we're not checking for
            # types or anything here

        # 2. md.Trajectory object
        elif isinstance(ref, mdtraj.Trajectory):
            self.name = ref.__repr__()[:]
        else:
            raise TypeError("input reference has to be either a filename or "
                            "a mdtraj.Trajectory object, and not of %s" %
                            type(ref))

        self.ref = ref
        self.ref_frame = ref_frame
        self.atom_indices = atom_indices
        self.precentered = precentered
        self.dimension = 1
Example #30
0
def save_pdb(traj_dir, clusterer, i):
	location = clusterer.cluster_ids_[i,:]
	traj = get_trajectory_files(traj_dir)[location[0]]
	print("traj = %s, frame = %d" %(traj, location[1]))
	conformation = md.load_frame(traj, location[1])
	conformation.save_pdb("/scratch/users/enf/b2ar_analysis/clusters_1000_allprot/%d.pdb" %i)
	return None
Example #31
0
def reproject_oldata():
  r1 = redis.StrictRedis(port=6390, decode_responses=True)
  cache = redis.StrictRedis(host='bigmem0006', port=6380, decode_responses=True)
  execlist = r1.hgetall('anl_sequence')
  keyorder = ['jc_'+i[0] for i in sorted(execlist.items(), key=lambda x:x[1])]
  # skip first 100 (non-sampled)
  pts = []
  bad_ref = 0
  miss = 0
  for key in keyorder:
    conf = r1.hgetall(key)
    src = int(conf['src_index'])
    ref = r1.lindex('xid:reference', src)
    if ref is not None:
      fileno, frame = eval(ref)
      ckey = 'sim:%s' % conf['name']
      xyz = cache.lindex(ckey, frame)
      if xyz is not None:
        pts.append(pickle.loads(xyz))
      else:
        tr = md.load_frame(conf['dcd'], frame, top=conf['pdb'])
        if len(tr.xyz) == 0:
          miss += 1
        else:
          pts.append(tr.xyz[0])
    else:
      bad_ref += 1
  traj = md.Trajectory(pts, deshaw.topo_prot.top)
  alpha = datareduce.filter_alpha(traj)
  return alpha
Example #32
0
    def onSaveTrajectory(self, target_filename=None):
        if target_filename is None:
            target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0]

        translation_vector = self.translation_vector
        rotation_matrix = self.rotation_matrix
        stride = self.stride

        if self.verbose:
            print("Stride: %s" % stride)
            print("\nRotation Matrix")
            print(rotation_matrix)
            print("\nTranslation vector")
            print(translation_vector)

        first_frame = md.load_frame(self.trajectory_filename, 0)
        traj_new = md.Trajectory(xyz=np.empty((1, first_frame.n_atoms, 3)), topology=first_frame.topology)
        traj_new.save(target_filename)

        chunk_size = 1000
        table = tables.open_file(target_filename, 'a')
        for i, chunk in enumerate(md.iterload(self.trajectory_filename, chunk=chunk_size, stride=stride)):
            xyz = chunk.xyz.copy()
            rotate(xyz, rotation_matrix)
            translate(xyz, translation_vector)
            table.root.xyz.append(xyz)
            table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32))
        table.close()
Example #33
0
def read_and_featurize_divided(filename,
                               dihedrals=['phi', 'psi', 'chi2'],
                               stride=10):
    #print("reading and featurizing %s" %(filename))

    traj_top = md.load_frame(filename, 0).topology
    atom_indices = [
        a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"
    ]

    traj = md.load(filename, atom_indices=atom_indices)
    #print("got traj")
    featurizer = DihedralFeaturizer(types=dihedrals)
    features = featurizer.transform(traj_list=traj)
    #print(np.shape(features))
    #print("finished featurizing")

    directory = filename.split("/")
    condition = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    #print("saving features as %s" %new_file_full)

    verbosedump(features, new_file_full)
    return features
Example #34
0
def traj(structure, trajectory, frame, contact):

    ### in the trajecotry take into account only atoms
    ### that are defined in the contact criterion file
    ### i.e. contact.dat

    molecule_types = cluster.molecules_types(contact)

    fc = open(structure, "r")

    atomsndx = []

    i = -1
    for line in fc:
        w = line.split()

        if (w[0] == "ATOM"):
            i = i + 1
            if w[2] in molecule_types:
                atomsndx.append(i)

    fc.close()

    ### topsel=md.load(str(structure)).topology
    ### atomsndx=topsel.select('resname CGA or resname CGB or resname CGC')

    traj = md.load_frame(trajectory,
                         top=structure,
                         index=frame,
                         atom_indices=atomsndx)
    return traj
Example #35
0
def cluster(data_dir, traj_dir, n_clusters):
    reduced_data = verboseload(data_dir)
    trajs = np.concatenate(reduced_data)
    plt.hexbin(trajs[:, 0], trajs[:, 1], bins='log', mincnt=1)

    clusterer = MiniBatchKMedoids(n_clusters=n_clusters)
    clusterer.fit_transform(reduced_data)

    centers = clusterer.cluster_centers_
    for i in range(0, np.shape(centers)[0]):
        center = centers[i, :]
        plt.scatter(center[0], center[1])
        plt.annotate('C%d' % i,
                     xy=(center[0], center[1]),
                     xytext=(center[0] + 0.1, center[1] + 0.1),
                     arrowprops=dict(facecolor='black', shrink=0.05))

        location = clusterer.cluster_ids_[i, :]
        print(location)
        traj = get_trajectory_files(traj_dir)[location[0]]
        print(("traj = %s" % traj))
        print(("frame = %d" % location[1]))
        conformation = md.load_frame(traj, location[1])
        conformation.save_pdb(
            "/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" % i)

    plt.show()
Example #36
0
def read_and_featurize(filename, dihedrals=['chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))
	top = md.load_frame(filename, 0).topology
	#print("got top")
	atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"]
	print((len(atom_indices)))
	#atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341]
	#print("got indices")
	traj = md.load(filename, stride=1000, atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
Example #37
0
def subsample_traj(traj, stride=5, top=None):
    directory = traj.split("/")
    simulation = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    condition = "%s-%s" % (simulation.split('-')[1], simulation.split('-')[2])
    print(("analyzing simulation %s file %s" % (simulation, dcd_file)))
    top_file = top

    top = md.load_frame(traj, 0, top=top_file).topology
    atom_indices = [
        a.index for a in top.atoms
        if str(a.residue)[0:3] != "POP" and not a.residue.is_water
        and str(a.residue)[0:2] != "NA" and str(a.residue)[0:2] != "CL"
    ]

    traj = md.load(traj,
                   stride=stride,
                   top=top_file,
                   atom_indices=atom_indices)
    print("traj loaded")

    new_file = "%s_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_allprot"

    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    print(("saving trajectory as %s" % new_file_full))
    traj.save(new_file_full)
Example #38
0
def subsample_traj(traj, stride=5, top=None):
    directory = traj.split("/")
    simulation = directory[len(directory) - 2]
    dcd_file = directory[len(directory) - 1]
    condition = "%s-%s" % (simulation.split('-')[1], simulation.split('-')[2])
    print(("analyzing simulation %s file %s" % (simulation, dcd_file)))
    top_file = top

    top = md.load_frame(traj, 0, top=top_file).topology
    atom_indices = [
        a.index for a in top.atoms
        if a.residue.is_protein and a.residue.resSeq != 341
        and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79
        and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue.
        resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA"
        and a.residue.name != "NME" and a.residue.name != "ACE"
    ]

    traj = md.load(traj,
                   stride=stride,
                   top=top_file,
                   atom_indices=atom_indices)

    new_file = "%s_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride)
    new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled"

    new_condition_dir = "%s/%s" % (new_root_dir, condition)

    new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file)
    print(("saving trajectory as %s" % new_file_full))
    traj.save(new_file_full)
    def onProcessTrajectory(self):
        print("onProcessTrajectory")
        energy_file = chisurf.widgets.save_file(
            description='Save energies', file_type='CSV-name file (*.txt)')

        s = 'FrameNbr\t'
        for p in self.universe.potentials:
            s += '%s\t' % p.name
        s += '\n'
        chisurf.fio.zipped.open_maybe_zipped(filename=energy_file,
                                             mode='w').write(s)

        self.structure = chisurf.structure.TrajectoryFile(
            mdtraj.load_frame(self.trajectory_file, 0))[0]
        i = 0
        for chunk in mdtraj.iterload(self.trajectory_file):
            for frame in chunk:
                self.structure.xyz = frame.xyz * 10.0
                self.structure.update_dist()
                s = '%i\t' % (i * self.stride + 1)
                for e in self.universe.getEnergies(self.structure):
                    s += '%.3f\t' % e
                print(s)
                s += '\n'
                i += 1
                open(energy_file, 'a').write(s)
Example #40
0
def read_and_featurize_divided(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10):
	#print("reading and featurizing %s" %(filename))

	traj_top = md.load_frame(filename,0).topology
	atom_indices = [a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"]

	traj = md.load(filename,atom_indices=atom_indices)
	#print("got traj")
	featurizer = DihedralFeaturizer(types = dihedrals)
	features = featurizer.transform(traj_list = traj)
	#print(np.shape(features))
	#print("finished featurizing")

	directory = filename.split("/")
	condition = directory[len(directory)-2]
	dcd_file = directory[len(directory)-1]
	new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride)
	new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features"
	new_condition_dir = "%s/%s" %(new_root_dir, condition)

	new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file)
	#print("saving features as %s" %new_file_full)

	verbosedump(features, new_file_full)
	return features
Example #41
0
def rmsd_to_structure(clusters_dir, ref_dir, text):
	pdbs = get_trajectory_files(clusters_dir)

	ref = md.load_frame(ref_dir, index=0)
	rmsds = np.zeros(shape=(len(pdbs),2))

	for i in range(0,len(pdbs)):
		print i 
		pdb_file = pdbs[i]
		pdb = md.load_frame(pdb_file, index=0)
		rmsd = md.rmsd(pdb, ref, 0)
		rmsds[i,0] = i
		rmsds[i,1] = rmsd[0]

	rmsd_file = "%s/%s_rmsds.csv" %(clusters_dir, text)
	np.savetxt(rmsd_file, rmsds, delimiter=",")
Example #42
0
    def _start(self):
        print("model")
        print(self.model_dict)
        n_features = float(self.model_dict['n_features'])
        n_states = float(self.model_dict['n_states'])
        self.model = MetastableSwitchingLDS(n_states, n_features)
        self.model.load_from_json_dict(self.model_dict)
        obs, hidden_states = self.model.sample(self.args.n_samples)
        (n_samples, n_features) = np.shape(obs)

        features, ii, ff = mixtape.featurizer.featurize_all(
            self.filenames, self.featurizer, self.topology, self.stride)
        file_trajectories = []

        states = []
        state_indices = []
        state_files = []
        logprob = log_multivariate_normal_density(
            features, np.array(self.model.means_),
            np.array(self.model.covars_), covariance_type='full')
        assignments = np.argmax(logprob, axis=1)
        probs = np.max(logprob, axis=1)
        # Presort the data into the metastable wells
        # i.e.: separate the original trajectories into k
        # buckets corresponding to the metastable wells
        for k in range(int(self.model.n_states)):
            # pick the structures that have the highest log
            # probability in the state
            s = features[assignments == k]
            ind = ii[assignments==k]
            f = ff[assignments==k]
            states.append(s)
            state_indices.append(ind)
            state_files.append(f)

        # Loop over the generated feature space trajectory.
        # At time t, pick the frame from the original trajectory
        # closest to the current sample in feature space. To save
        # a bit of computation, just search in the bucket corresponding
        # to the current metastable well (i.e., the current hidden state).
        traj = None
        for t in range(n_samples):
            featurized_frame = obs[t]
            h = hidden_states[t]
            logprob = log_multivariate_normal_density(
                states[h], featurized_frame[np.newaxis],
                self.model.Qs_[h][np.newaxis],
                covariance_type='full')
            best_frame_pos = np.argmax(logprob, axis=0)[0]
            best_file = state_files[h][best_frame_pos]
            best_ind = state_indices[h][best_frame_pos]
            frame = md.load_frame(best_file, best_ind, self.topology)
            if t == 0:
                traj = frame
            else:
                frame.superpose(traj, t-1)
                traj = traj.join(frame)
        traj.save('%s.xtc' % self.out)
        traj[0].save('%s.xtc.pdb' % self.out)
Example #43
0
def gen_structures(ys, reference, filenames, outs, N_atoms):
  atom_indices = arange(N_atoms)
  xx, ii, ff = load_timeseries(filenames, atom_indices, reference)
  for y, out in zip(ys, outs):
    i = np.argmin(np.sum((y - xx)**2, axis=1))
    frame = md.load_frame(ff[i], ii[i])
    frame.superpose(reference)
    frame.save('%s.pdb' % out)
Example #44
0
def test_load_frame():
    files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr',
             'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos',
             'legacy_msmbuilder_trj0.lh5']
    trajectories = [md.load(get_fn(f), top=get_fn('native.pdb')) for f in files]
    rand = [np.random.randint(len(t)) for t in trajectories]
    frames = [md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand)]

    for traj, frame, r, f in zip(trajectories, frames, rand, files):
        eq(traj[r].xyz, frame.xyz)
        eq(traj[r].unitcell_vectors, frame.unitcell_vectors)
        eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f))

    t1 = md.load(get_fn('2EQQ.pdb'))
    r = np.random.randint(len(t1))
    t2 = md.load_frame(get_fn('2EQQ.pdb'), r)
    eq(t1[r].xyz, t2.xyz)
Example #45
0
def save_pdb(traj_dir, clusterer, i):
    location = clusterer.cluster_ids_[i, :]
    traj = get_trajectory_files(traj_dir)[location[0]]
    print(("traj = %s, frame = %d" % (traj, location[1])))
    conformation = md.load_frame(traj, location[1])
    conformation.save_pdb(
        "/scratch/users/enf/b2ar_analysis/clusters_1000_allprot/%d.pdb" % i)
    return None
Example #46
0
def plot_rmsd(trajectories,
              topology=None,
              subset=None,
              output='rmsd.dat',
              chunksize=100,
              reimage=False):
    import mdtraj
    import numpy as np
    from tqdm import tqdm
    if topology:
        topology = mdtraj.load_topology(topology)
    if subset:
        subset = topology.select(subset)
    trajectories = sorted(trajectories, key=sort_key_for_numeric_suffixes)
    first_frame = mdtraj.load_frame(trajectories[0], 0, top=topology)
    frame_size = first_frame.xyz[0].nbytes
    if reimage:
        first_frame.image_molecules(inplace=True)
    rmsds = []
    for trajectory in tqdm(trajectories, unit='file'):
        _, ext = os.path.splitext(trajectory)
        total, unit_scale = None, None
        if ext.lower() == '.dcd':
            n_frames = round(
                os.path.getsize(trajectory) / frame_size,
                -1 * len(str(chunksize)[1:]))
            total = int(n_frames / chunksize)
            unit_scale = chunksize
        itertraj = mdtraj.iterload(trajectory, top=topology, chunk=chunksize)
        tqdm_kwargs = {
            'total': total,
            'unit': 'frames',
            'unit_scale': unit_scale,
            'postfix': {
                'traj': trajectory
            }
        }
        for chunk in tqdm(itertraj, **tqdm_kwargs):
            if reimage:
                chunk.image_molecules(inplace=True)
            rmsd = mdtraj.rmsd(chunk, first_frame,
                               atom_indices=subset) * 10.0  # nm->A
            rmsds.append(rmsd)

    rmsds = np.concatenate(rmsds)
    with open(output, 'w') as f:
        f.write('\n'.join(map(str, rmsds)))
    print('\nWrote RMSD values to', output)
    print('Plotting results...')
    plt.plot(rmsds)
    fig = plt.gca()
    fig.set_title('{}{}'.format(
        trajectories[0], ' and {} more'.format(
            len(trajectories[1:]) if len(trajectories) > 1 else '')))
    fig.set_xlabel('Frames')
    fig.set_ylabel('RMSD (A)')
    plt.show()
Example #47
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
Example #48
0
 def _eval_traj_shapes(self):
     lengths = np.zeros(self.n_trajs)
     n_atoms = np.zeros(self.n_trajs)
     for i in xrange(self.n_trajs):
         filename = self.traj_filename(i)
         with md.open(filename) as f:
             lengths[i] = len(f)
         n_atoms[i] = md.load_frame(filename, 0).n_atoms
     return lengths, n_atoms
Example #49
0
def test_residues_map(traj_file_1, traj_file_2, residues, residues_map):
    traj_1 = md.load_frame(traj_file_1, index=0)
    traj_2 = md.load_frame(traj_file_2, index=0)
    top1 = traj_1.topology
    top2 = traj_2.topology
    for residue in residues:
        new_residue = residues_map[residue]
        print("Original residues:")
        residues = [
            r for r in top1.residues if r.resSeq == residue and r.is_protein
        ]
        print((residues[0]))
        print("New residues:")
        residues = [
            r for r in top2.residues
            if r.resSeq == new_residue and r.is_protein
        ]
        print((residues[0]))
    return
Example #50
0
def solublim(dirout, contact, trajectory, structure, cst):

    ### read the box size and calculate the volume

    fi = open(str(dirout) + "/box.dat", "r")
    data = numpy.loadtxt(fi)
    bx = data[0]
    by = data[1]
    bz = data[2]
    fi.close()

    bv = bx * by * bz

    ### read molecules radii

    radii = cluster.molecules_radii(contact)

    ### read and divide molecules between large and small clusters

    lc = []  ### molecules in large clusters
    sc = []  ### molecules in small clusters

    fi = open(str(dirout) + "/cluster.dat", "r")
    for line in fi:
        w = line.split()
        c = w[1:]  ### skip the first item, i.e. cluster size
        if (len(c) > cst):
            for j in c:
                lc.append(j)
        else:
            for j in c:
                sc.append(j)
    fi.close()

    ### read molecule types and calculate the volume

    lv = 0.0

    traj = mdtraj.load_frame(trajectory, top=structure, index=0)
    for lm in lc:
        res = traj.topology.atom(int(lm)).name
        rad = radii[res]

        lv += 4.0 / 3.0 * 3.141592653589793 * rad**3


### solubility limit [mM]
### number of proteins in small clusters per saturated volume,
### i.e. box volume minus volume of proteins in large clusters

### 1 prot. / 1 nm3 = 1.660539 [M]

    fi = open(str(dirout) + "/solublim.dat", "w")
    print(len(sc) / (bv - lv) * 1.660539 * 1000, file=fi)
    fi.close()
Example #51
0
def load_frame(base_dir, protein, traj_folder, filename, frame_index):
    """
    :param base_dir: Project's base dir
    :param protein: Protein of interest
    :param filename: file to load
    :param frame_index: needed frame
    :return: The required frame
    """
    os.chdir(os.path.join(base_dir, protein,traj_folder))
    filename = os.path.splitext(filename)[0]
    return mdt.load_frame(filename="%s.hdf5"%filename, index=frame_index)
Example #52
0
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True):

	a = time.time()
	dihedral_indices = []
	residue_order = []
	if len(dihedral_residues) > 0:
		for dihedral_type in dihedral_types:
			if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues))
			if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues))

		#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

		#print("feauturizing manually:")
		dihedral_angles = []

		for dihedral_type in dihedral_indices:
			angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type))
			dihedral_angles.append(np.sin(angles))
			dihedral_angles.append(np.cos(angles))

		manual_features = np.transpose(np.concatenate(dihedral_angles))

	if len(resSeq_pairs) > 0:
		top = md.load_frame(traj_file, index=0).topology
		resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs)
		contact_features = []
		if iterative:
			try:
				for chunk in md.iterload(traj_file, chunk = 1000):
				#	chunk = fix_traj(chunk)
				#chunk = md.load(traj_file,stride=1000)
				#print(resIndex_pairs[0:10])
					chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
					print(np.shape(chunk_features))
					contact_features.append(chunk_features)
				contact_features = np.concatenate(contact_features)
			except Exception,e:
				print str(e)
				print("Failed")
				return
				#traj = md.load(traj_file)
				#contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
		else:
			try:
				traj = md.load(traj_file)
				contact_features =  md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0]
			except Exception,e:
				print str(e)
				print("Failed for traj")
				return
Example #53
0
def test_load_frame(ref_traj, get_fn):
    if ref_traj.fobj is md.formats.GroTrajectoryFile:
        pytest.xfail("Gro doesn't implement seek")
    trajectory = md.load(get_fn(ref_traj.fn), top=get_fn('native.pdb'))
    rand = np.random.randint(len(trajectory))
    frame = md.load_frame(get_fn(ref_traj.fn), index=rand, top=get_fn('native.pdb'))

    if ref_traj.fobj is md.formats.DTRTrajectoryFile:
        pytest.xfail("DTR doesn't load a single frame properly")
    eq(trajectory[rand].xyz, frame.xyz)
    eq(trajectory[rand].unitcell_vectors, frame.unitcell_vectors)
    if has_time_info(ref_traj.fext):
        eq(trajectory[rand].time, frame.time)
Example #54
0
def get_samples(cluster, trajectories, clusters_map, clusterer_dir, features_dir, traj_dir, save_dir, n_samples, method):
	num_configurations = len(clusters_map[cluster])
	if method == "random":
		try:
			indices = random.sample(range(num_configurations), n_samples)
		except:
			return(range(0, min(n_samples, num_configurations)))
		#print indices
	else:
		indices = range(0, min(n_samples, num_configurations))
	
	for s in range(0, n_samples):
		if s == len(clusters_map[cluster]): return(indices[0:s])
		if method != "random":
			k = s
		else:
			k = indices[s]
		sample = clusters_map[cluster][k]
		traj_id = sample[0]
		frame = sample[1]
		traj = trajectories[traj_id]
		print("cluster %d sample %d" %(cluster, k))
		#print traj

		#traj_obj = md.load(traj)
		#print traj_obj
		#print frame

		top = md.load_frame(traj, index=frame).topology
		atom_indices = [a.index for a in top.atoms if str(a.residue)[0:3] != "SOD" and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341]
		#print indices

		conformation = md.load_frame(traj, index=frame, atom_indices=sorted(atom_indices))
		conformation.save_pdb("%s/cluster%d_sample%d.pdb" %(save_dir, cluster, s))
	
	print(cluster)
	#print(indices)
	#print(len(indices))
	return indices
Example #55
0
def read_and_featurize_custom(traj_file, condition=None, location=None, dihedral_residues = None, distance_residues = None):
	top = md.load_frame(traj_file,index = 0).topology
	#atom_indices = [a.index for a in top.atoms if a.residue.resSeq != 130]
	atom_indices = [a.index for a in top.atoms]
	traj = md.load(traj_file, atom_indices=atom_indices)
	print traj_file
	#print traj
	#print("loaded trajectory")

	'''
	a = time.time()
	featurizer = DihedralFeaturizer(types = ['phi', 'psi', 'chi2'])
	features = featurizer.transform(traj)
	b = time.time()
	#print(b-a)
	print("original features has dim")
	print(np.shape(features))
	'''
	a = time.time()

	
	phi_tuples = phi_indices(traj.topology, dihedral_residues)
	psi_tuples = psi_indices(traj.topology, dihedral_residues)
	chi2_tuples = chi2_indices(traj.topology, dihedral_residues)

	#if distance_residues is not None:

	

	#print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples)))

	#print("feauturizing manually:")

	phi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=phi_tuples))
	psi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=psi_tuples))
	chi2_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=chi2_tuples))
	
	manual_features = np.concatenate([np.sin(phi_angles), np.cos(phi_angles), np.sin(psi_angles), np.cos(psi_angles), np.sin(chi2_angles), np.cos(chi2_angles)])
	b = time.time()
	#print(b-a)

	print("new features has shape: ")
	print(np.shape(manual_features))

	if condition is None:
		condition = get_condition(traj_file)

	if location is None:
		location = "/scratch/users/enf/b2ar_analysis/features_allprot"

	verbosedump(manual_features, "%s/%s.h5" %(location, condition))
Example #56
0
def reimage_traj(traj_file, traj_dir, save_dir, ext):
    if ext == ".pdb":
        file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1]
        filename = file_lastname.split(".")[0]
        h5_filename = file_lastname
        new_h5_file = "%s/%s" % (save_dir, h5_filename)
        if os.path.exists(new_h5_file):
            print "already reimaged"
            return

        traj_pytraj = mdio.load(traj_file, top=traj_file)[:]
        # traj_pytraj.fixatomorder()
        traj_pytraj.autoimage()

        traj_pytraj.save(new_h5_file)
        print "saving %s" % h5_filename

    else:
        traj_file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1]
        traj_filename = traj_file_lastname.split(".")[0]
        traj_dcd = "%s/%s.dcd" % (traj_dir, traj_filename)
        traj_pdb = "%s/%s.pdb" % (traj_dir, traj_filename)
        traj = md.load(traj_file)
        traj_frame = md.load_frame(traj_file, index=0)
        traj.save_dcd(traj_dcd)
        traj_frame.save_pdb(traj_pdb)

        traj_pytraj = mdio.load(traj_dcd, top=traj_pdb)[:]
        traj_pytraj.autoimage()

        file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1]
        filename = file_lastname.split(".")[0]
        dcd_filename = "%s_temp.dcd" % filename
        top_filename = "%s_temp.pdb" % filename
        h5_filename = file_lastname
        new_dcd_file = "%s/%s" % (save_dir, dcd_filename)
        new_top_file = "%s/%s" % (save_dir, top_filename)
        new_h5_file = "%s/%s" % (save_dir, h5_filename)
        print new_dcd_file
        print new_top_file
        traj_pytraj.save(new_dcd_file)
        traj_pytraj.save(new_top_file)

        new_traj = md.load(new_dcd_file, top=traj_pdb)
        new_traj.save(new_h5_file)
        os.remove(traj_dcd)
        os.remove(traj_pdb)
        os.remove(new_dcd_file)
        os.remove(new_top_file)
    return
Example #57
0
def gen_movie(ys, reference, filenames, out, N_atoms):
  """Assumes that we have a full featurization of the model. Should fix
     this for larger systems.
  """
  atom_indices = arange(N_atoms)
  xx, ii, ff = load_timeseries(filenames, atom_indices, reference)
  movieframes = []
  for y in ys:
    i = np.argmin(np.sum((y - xx)**2, axis=1))
    movieframes.append(md.load_frame(ff[i], ii[i]))
  movie = reduce(lambda a, b: a.join(b), movieframes)
  movie.superpose(movie)
  movie.save('%s.xtc' % out)
  movie[0].save('%s.xtc.pdb' % out)
 def __init__(self, input_prmtop, input_trjname, clustercenter_file):
     """
     Data members
     """
     """
     Initializes  an object of HSAcalcs class
     """
     self.hsa_data = self._initializeHSADict(clustercenter_file)
     print "Reading in topology ..."
     first_frame = md.load_frame(input_trjname, 0, top=input_prmtop)
     self.top = first_frame.topology
     print "Generating atom indices ..."
     self._indexGenerator()
     print "Done..."
Example #59
0
def test_load_frame():
    files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr',
             'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos', 'frame0.xyz',
             'frame0.lammpstrj']
    if not (on_win and on_py3):
        files.append('legacy_msmbuilder_trj0.lh5')

    trajectories = [md.load(get_fn(f), top=get_fn('native.pdb')) for f in files]
    rand = [np.random.randint(len(t)) for t in trajectories]
    frames = [md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand)]

    for traj, frame, r, f in zip(trajectories, frames, rand, files):
        def test():
            eq(traj[r].xyz, frame.xyz)
            eq(traj[r].unitcell_vectors, frame.unitcell_vectors)
            eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f))
        test.description = 'test_load_frame: %s' % f
        yield test

    t1 = md.load(get_fn('2EQQ.pdb'))
    r = np.random.randint(len(t1))
    t2 = md.load_frame(get_fn('2EQQ.pdb'), r)
    eq(t1[r].xyz, t2.xyz)
Example #60
0
def reimage_traj(traj_file, save_dir):
	traj = md.load(traj_file)
	topology = md.load_frame(traj_file,index=0)

	traj_pytraj = mdio.load_mdtraj(traj)
	traj_pytraj.autoimage()
	traj.xyz[:] = traj_pytraj.xyz / 10.
	filename = traj_file.split("/")[len(traj_file.split("/"))-1]
	filename = filename.split(".")[0]
	h5_filename = "%s.h5" %filename
	new_h5_file = "%s/%s" %(save_dir, h5_filename)
	print new_h5_file
	traj.save(new_h5_file)
	return