def __init__(self, topology_file, trajectory, start_frame=0, num_frames=0, supporting_file=None, rho_bulk=None): """Initialize WaterAnalysis object for a trajectory and corresponding topology file. Parameters ---------- topology_file : string Filename of the system topology file. trajectory : string Filename of the molecular dynamics trajectory. start_frame : int, optional The frame index from which the calculations will begin. Default: 0 num_frames : int, optional The total number of frames or the length of simulation over which calculations will be performed. Default: 0 supporting_file : None, optional Filename of additional file containing non-bonded parameters for every particle in the system. Default: None rho_bulk : float Reference bulk water density to be used in calculations. Default: None """ self.topology_file = topology_file self.trajectory = trajectory self.supporting_file = supporting_file self.start_frame = start_frame assert num_frames >= 100, "A minimum of 100 frames are required for analysis." self.num_frames = num_frames self.check_topology_requiremnts(self.topology_file, self.supporting_file) first_frame = md.load_frame(self.trajectory, self.start_frame, top=self.topology_file) assert first_frame.unitcell_lengths is not None, "Could not detect unit cell information." self.topology = first_frame.topology self.box_type = "Unspecified" orthogonal = False try: orthogonal = np.allclose(md.load_frame(self.trajectory, 0, top=self.topology_file).unitcell_angles, 90) if orthogonal: self.box_type = "Orthorhombic" except Exception as e: print("WARNING: Only orthorhombic periodic boxes are currently supported.") self.rho_bulk = rho_bulk if self.rho_bulk is None: self.rho_bulk = 0.0334 super_wat_select_exp = "" for i, wat_res in enumerate(_WATER_RESNAMES): if i < len(_WATER_RESNAMES) - 1: super_wat_select_exp += "resname %s or " % wat_res else: super_wat_select_exp += "resname %s" % wat_res self.all_atom_ids = self.topology.select("all") self.wat_atom_ids = self.topology.select("water") self.prot_atom_ids = self.topology.select("protein") if self.wat_atom_ids.shape[0] == 0: self.wat_atom_ids = self.topology.select(super_wat_select_exp) assert (self.wat_atom_ids.shape[0] != 0), "Unable to recognize waters in the system!" assert (self.topology.atom(self.wat_atom_ids[0]).name == "O"), "Failed while constructing water oxygen atom indices!" self.wat_oxygen_atom_ids = np.asarray([atom for atom in self.wat_atom_ids if self.topology.atom(atom).name == "O"]) self.non_water_atom_ids = np.setdiff1d(self.all_atom_ids, self.wat_atom_ids) assert (self.wat_atom_ids.shape[0] + self.non_water_atom_ids.shape[0] == self.all_atom_ids.shape[0]), "Failed to partition atom indices in the system correctly!"
def onJoinTrajectories(self): target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0] fn1 = self.trajectory_filename_1 fn2 = self.trajectory_filename_2 r1 = self.reverse_traj_1 r2 = self.reverse_traj_2 traj_1 = md.load_frame(fn1, index=0) traj_2 = md.load_frame(fn2, index=0) # Create empty trajectory if self.join_mode == 'time': traj_join = traj_1.join(traj_2) axis = 0 elif self.join_mode == 'atoms': traj_join = traj_1.stack(traj_2) axis = 1 target_traj = md.Trajectory(xyz=np.empty((0, traj_join.n_atoms, 3)), topology=traj_join.topology) target_traj.save(target_filename) chunk_size = self.chunk_size table = tables.open_file(target_filename, 'a') for i, (c1, c2) in enumerate(izip(md.iterload(fn1, chunk=chunk_size), md.iterload(fn2, chunk=chunk_size))): xyz_1 = c1.xyz[::-1] if r1 else c1.xyz xyz_2 = c2.xyz[::-1] if r2 else c2.xyz xyz = np.concatenate((xyz_1, xyz_2), axis=axis) table.root.coordinates.append(xyz) table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32)) table.close()
def test_load_frame(): files = [ "frame0.nc", "frame0.h5", "frame0.xtc", "frame0.trr", "frame0.dcd", "frame0.mdcrd", "frame0.binpos", "frame0.xyz", "frame0.lammpstrj", ] if not (on_win and on_py3): files.append("legacy_msmbuilder_trj0.lh5") trajectories = [md.load(get_fn(f), top=get_fn("native.pdb")) for f in files] rand = [np.random.randint(len(t)) for t in trajectories] frames = [md.load_frame(get_fn(f), index=r, top=get_fn("native.pdb")) for f, r in zip(files, rand)] for traj, frame, r, f in zip(trajectories, frames, rand, files): def test(): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg="%d, %d: %s" % (traj[r].time[0], frame.time[0], f)) test.description = "test_load_frame: %s" % f yield test t1 = md.load(get_fn("2EQQ.pdb")) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn("2EQQ.pdb"), r) eq(t1[r].xyz, t2.xyz)
def sample_clusters(clusterer_dir, features_dir, traj_dir, save_dir, n_samples): clusters_map = dist_to_means(clusterer_dir, features_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) #non_palm = get_traj_no_palm(traj_dir) trajectories = get_trajectory_files(traj_dir) for cluster in range(0, len(clusters_map.keys())): for s in range(0, n_samples): sample = clusters_map[cluster][s] traj_id = sample[0] frame = sample[1] traj = trajectories[traj_id] top = md.load_frame(traj, index=frame).topology indices = [a.index for a in top.atoms if str(a.residue)[0:3] != "SOD" and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341] conformation = md.load_frame(traj, index=frame, atom_indices=indices) conformation.save_pdb("%s/cluster%d_sample%d.pdb" %(save_dir, cluster, s)) remove_ter(save_dir) reorder(save_dir) #remove_palm(save_dir) new_dir = reimage(save_dir)
def test_load_frame(): files = [ 'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos', 'frame0.xyz', 'frame0.lammpstrj' ] if not (on_win and on_py3): files.append('legacy_msmbuilder_trj0.lh5') trajectories = [ md.load(get_fn(f), top=get_fn('native.pdb')) for f in files ] rand = [np.random.randint(len(t)) for t in trajectories] frames = [ md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand) ] for traj, frame, r, f in zip(trajectories, frames, rand, files): def test(): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f)) test.description = 'test_load_frame: %s' % f yield test t1 = md.load(get_fn('2EQQ.pdb')) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn('2EQQ.pdb'), r) eq(t1[r].xyz, t2.xyz)
def map_drawn_samples(selected_pairs_by_state, trajectories, top=None): """Lookup trajectory frames using pairs of (trajectory, frame) indices. Parameters ---------- selected_pairs_by_state : np.ndarray, dtype=int, shape=(n_states, n_samples, 2) selected_pairs_by_state[state, sample] gives the (trajectory, frame) index associated with a particular sample from that state. trajectories : list(md.Trajectory) or list(np.ndarray) or list(filenames) The trajectories assocated with sequences, which will be used to extract coordinates of the state centers from the raw trajectory data. This can also be a list of np.ndarray objects or filenames. If they are filenames, mdtraj will be used to load top : md.Topology, optional, default=None Use this topology object to help mdtraj load filenames Returns ------- frames_by_state : mdtraj.Trajectory Output will be a list of trajectories such that frames_by_state[state] is a trajectory drawn from `state` of length `n_samples`. If trajectories are numpy arrays, the output will be numpy arrays instead of md.Trajectories Examples -------- >>> selected_pairs_by_state = hmm.draw_samples(sequences, 3) >>> samples = map_drawn_samples(selected_pairs_by_state, trajectories) Notes ----- YOU are responsible for ensuring that selected_pairs_by_state and trajectories correspond to the same dataset! See Also -------- utils.map_drawn_samples : Extract conformations from MD trajectories by index. ghmm.GaussianFusionHMM.draw_samples : Draw samples from GHMM ghmm.GaussianFusionHMM.draw_centroids : Draw centroids from GHMM """ frames_by_state = [] for state, pairs in enumerate(selected_pairs_by_state): if isinstance(trajectories[0], str): import mdtraj as md if top: process = lambda x, frame: md.load_frame(x, frame, top=top) else: process = lambda x, frame: md.load_frame(x, frame) else: process = lambda x, frame: x[frame] frames = [process(trajectories[trj], frame) for trj, frame in pairs] try: # If frames are mdtraj Trajectories state_trj = frames[0][0:0].join(frames) # Get an empty trajectory with correct shape and call the join method on it to merge trajectories except AttributeError: state_trj = np.array(frames) # Just a bunch of np arrays frames_by_state.append(state_trj) return frames_by_state
def export_frame_coordinates(topology, trajectory, nframe, output=None): """ Extract a single frame structure from a trajectory. """ if output is None: basename, ext = os.path.splitext(trajectory) output = '{}.frame{}.inpcrd'.format(basename, nframe) # ParmEd sometimes struggles with certain PRMTOP files if os.path.splitext(topology)[1] in ('.top', '.prmtop'): top = AmberPrmtopFile(topology) mdtop = mdtraj.Topology.from_openmm(top.topology) traj = mdtraj.load_frame(trajectory, int(nframe), top=mdtop) structure = parmed.openmm.load_topology(top.topology, system=top.createSystem()) structure.box_vectors = top.topology.getPeriodicBoxVectors() else: # standard protocol (the topology is loaded twice, though) traj = mdtraj.load_frame(trajectory, int(nframe), top=topology) structure = parmed.load_file(topology) structure.positions = traj.openmm_positions(0) if traj.unitcell_vectors is not None: # if frame provides box vectors, use those structure.box_vectors = traj.openmm_boxes(0) structure.save(output, overwrite=True)
def sample_clusters(clusterer_dir, features_dir, traj_dir, save_dir, n_samples): clusters_map = dist_to_means(clusterer_dir, features_dir) if not os.path.exists(save_dir): os.makedirs(save_dir) #non_palm = get_traj_no_palm(traj_dir) trajectories = get_trajectory_files(traj_dir) for cluster in range(0, len(list(clusters_map.keys()))): for s in range(0, n_samples): sample = clusters_map[cluster][s] traj_id = sample[0] frame = sample[1] traj = trajectories[traj_id] top = md.load_frame(traj, index=frame).topology indices = [ a.index for a in top.atoms if str(a.residue)[0:3] != "SOD" and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341 ] conformation = md.load_frame(traj, index=frame, atom_indices=indices) conformation.save_pdb("%s/cluster%d_sample%d.pdb" % (save_dir, cluster, s)) remove_ter(save_dir) reorder(save_dir) #remove_palm(save_dir) new_dir = reimage(save_dir)
def test_load_frame(): files = [ 'frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos', 'legacy_msmbuilder_trj0.lh5' ] trajectories = [ md.load(get_fn(f), top=get_fn('native.pdb')) for f in files ] rand = [np.random.randint(len(t)) for t in trajectories] frames = [ md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand) ] for traj, frame, r, f in zip(trajectories, frames, rand, files): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f)) t1 = md.load(get_fn('2EQQ.pdb')) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn('2EQQ.pdb'), r) eq(t1[r].xyz, t2.xyz)
def save_traj_w_md_load_frame(reader, sets): # Creates a single trajectory object from a "sets" array via md.load_frames traj = None for file_idx, frame_idx in vstack(sets): if traj is None: traj = md.load_frame(reader.filenames[file_idx], frame_idx, reader.topfile) else: traj = traj.join(md.load_frame(reader.filenames[file_idx], frame_idx, reader.topfile)) return traj
def test_residues_map(traj_file_1, traj_file_2, residues, residues_map): traj_1 = md.load_frame(traj_file_1, index = 0) traj_2 = md.load_frame(traj_file_2, index = 0) top1 = traj_1.topology top2 = traj_2.topology for residue in residues: new_residue = residues_map[residue] print("Original residues:") residues = [r for r in top1.residues if r.resSeq == residue and r.is_protein] print(residues[0]) print("New residues:") residues = [r for r in top2.residues if r.resSeq == new_residue and r.is_protein] print(residues[0]) return
def test_residues_map_num_atoms(traj_file_1, traj_file_2, residues, residues_map): traj_1 = md.load_frame(traj_file_1, index = 0) traj_2 = md.load_frame(traj_file_2, index = 0) top1 = traj_1.topology top2 = traj_2.topology for residue in residues: new_residue = residues_map[residue] atoms = [a.index for a in top1.atoms if a.residue.resSeq == residue and a.residue.is_protein] len1 = len(atoms) atoms = [a.index for a in top2.atoms if a.residue.resSeq == new_residue and a.residue.is_protein] len2 = len(atoms) if (len1 != len2) or (len1 == len2): print("Atom number %d %d doesn't match for residue %d" %(len1, len2, residue)) return
def generate_traj_from_stateinds(inds, meta, atom_selection='all'): """ Concatenate several frames from different trajectories to create a new one. Parameters ---------- inds: list of tuples, Each element of the list has to be a 2D tuple of ints (traj_index, frame_index) meta: a metadata object atom_selection: str, Which atoms to load Returns ------- traj: mdtraj.Trajectory """ frame_list = [] for traj_i, frame_i in inds: top = mdtraj.load_prmtop(meta.loc[traj_i]['top_fn']) atoms = top.select(atom_selection) frame_list.append( mdtraj.load_frame(meta.loc[traj_i]['traj_fn'], atom_indices=atoms, index=frame_i, top=meta.loc[traj_i]['top_fn']) ) traj = mdtraj.join(frame_list, check_topology=False) traj.center_coordinates() traj.superpose(traj, 0) return traj
def read_and_featurize(filename, dihedrals=['chi2'], stride=10): #print("reading and featurizing %s" %(filename)) top = md.load_frame(filename, 0).topology #print("got top") atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"] print(len(atom_indices)) #atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341] #print("got indices") traj = md.load(filename, stride=1000, atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types = dihedrals) features = featurizer.transform(traj_list = traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory)-2] dcd_file = directory[len(directory)-1] new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" %(new_root_dir, condition) new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def loadFrames(confs_by_state): """ input is array of arrays """ frames = [] for elem in confs_by_state: trajFrames = [] for trajFrame in elem: file = os.path.basename(trajFrame[0]) frame = trajFrame[1] regex = "(.*)_traj.*_(\d*).xtc" m = re.match(regex,file) projectName = m.group(1) trajNum = m.group(2) #now find the actual trajectory #TODO also get the regular traj originalTraj = "../%s/analysis/full/traj_full_%s.xtc"%(projectName,trajNum) #load the ref ref = "../%s/analysis/full/ref.pdb"%projectName print ("loading %s frame %s"%(originalTraj,frame)) loadedFrame = md.load_frame(originalTraj,frame,top=ref) trajFrames.append(loadedFrame) frames.append(trajFrames) return frames
def get_feature_list(feature_residues_csv, structure_file): feature_names = generate_features(feature_residues_csv) structure = md.load_frame(structure_file, index=0) all_resSeq = [ r.resSeq for r in structure.topology.residues if r.is_protein ] all_res = [ str(r).title() for r in structure.topology.residues if r.is_protein ] feature_list = [] for i, feature_name in enumerate(feature_names): try: res_i = int(feature_name[0]) res_j = int(feature_name[1]) except: res_i = int(feature_name[0][1]) res_j = int(feature_name[1][1]) res_i_idx = all_resSeq.index(res_i) res_i_name = all_res[res_i_idx] res_j_idx = all_resSeq.index(res_j) res_j_name = all_res[res_j_idx] feature_list.append((res_i_name, res_j_name)) return (feature_list)
def compute_contacts_below_cutoff(traj_file_frame, cutoff = 100000.0, contact_residues = [], anton = False): traj_file = traj_file_frame[0] frame = md.load_frame(traj_file, index = 0) #frame = fix_traj(frame) top = frame.topology distance_residues = [] res_indices = [] resSeq_to_resIndex = {} residue_full_infos = [] for i in range(0, len(contact_residues)): residue = contact_residues[i] indices = [r.index for r in top.residues if r.resSeq == residue[1] and r.chainid == residue[0] and not r.is_water] if len(indices) == 0: print("No residues in trajectory for residue %d" %residue) continue else: ind = indices[0] for j in indices: if j != ind: #print("Warning: multiple res objects for residue %d " %residue) if "CB" in [str(a) for a in r.atoms for r in top.residues if r.index == ind]: ind = j res_indices.append(ind) distance_residues.append(residue) resSeq_to_resIndex[residue] = ind resSeq_combinations = itertools.combinations(distance_residues, 2) res_index_combinations = [] resSeq_pairs = [c for c in resSeq_combinations] for combination in resSeq_pairs: res0 = combination[0] res1 = combination[1] res_index0 = resSeq_to_resIndex[res0] res_index1 = resSeq_to_resIndex[res1] res_index_combinations.append((res_index0, res_index1)) final_resSeq_pairs = [] final_resIndex_pairs = [] distances = md.compute_contacts(frame, contacts = res_index_combinations, scheme = 'closest-heavy', ignore_nonprotein=False)[0] #print(distances) print(np.shape(distances)) for i in range(0, len(distances[0])): distance = distances[0][i] #print(distance) if distance < cutoff: final_resIndex_pairs.append(res_index_combinations[i]) final_resSeq_pairs.append(resSeq_pairs[i]) for pair in final_resIndex_pairs: info0 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[0]] info1 = [(r.resSeq, r.name, r.chain.index) for r in top.residues if r.index == pair[1]] residue_full_infos.append((info0, info1)) print(len(final_resSeq_pairs)) print(len(final_resIndex_pairs)) return((final_resSeq_pairs, residue_full_infos))
def find_most_important_residues_in_tIC(traj_file, tica_object, tic_features_csv, contact_residues,tic_residue_csv, feature_coefs_csv, duplicated_feature_coefs_csv, cutoff): try: tica = verboseload(tica_object) except: tica = load_dataset(tica_object) print traj_file traj = md.load_frame(traj_file, 0) #traj = fix_traj(traj) top = traj.topology #residue_pairs = compute_contacts_below_cutoff([traj_file, [0]], cutoff = cutoff, contact_residues = contact_residues, anton = True) residue_pairs = generate_features(tic_features_csv) new_residue_pairs = [] for pair in residue_pairs: new_residue_pairs.append(("%s%d.%d" %(pair[0][2], pair[0][1], pair[0][0])), ("%s%d.%d" %(pair[1][2], pair[1][1], pair[1][0]))) residue_pairs = new_residue_pairs #print traj_file top_indices_per_tIC = {} feature_coefs_per_tIC = {} duplicated_feature_coefs_per_tIC = {} #for each tIC: #for each feature, get the absolute component value #add to feature_coefs_per_tIC dictionary the absolute coefficient for that tIC #duplicate them for the analysis where we look at residues individually #sort by absolute coefficient value #for each tIC: # for i in range(0, np.shape(tica.components_)[0]): print i index_components = [(j,abs(tica.components_[i][j])) for j in range(0,np.shape(tica.components_)[1])] feature_coefs_per_tIC[i] = [component[1] for component in index_components] duplicated_feature_coefs_per_tIC[i] = [j for k in feature_coefs_per_tIC[i] for j in (k, k)] index_components = sorted(index_components, key= lambda x: x[1],reverse=True) print(index_components[0:10]) list_i = [index_components[j][0] for j in range(0,len(index_components))] top_indices_per_tIC[i] = list_i top_residues_per_tIC = {} for i in range(0, np.shape(tica.components_)[0]): top_residues_per_tIC[i] = [] for index in top_indices_per_tIC[i]: residues = residue_pairs[index] top_residues_per_tIC[i].append(residues) top_residues_per_tIC[i] = [item for sublist in top_residues_per_tIC[i] for item in sublist] residue_list = residue_pairs feature_coefs_per_tIC["residues_0"] = [pair[0] for pair in residue_list] feature_coefs_per_tIC["residues_1"] = [pair[1] for pair in residue_list] duplicated_feature_coefs_per_tIC["residues"] = [residue for residue_pair in residue_list for residue in residue_pair] write_map_to_csv(tic_residue_csv, top_residues_per_tIC, []) write_map_to_csv(feature_coefs_csv, feature_coefs_per_tIC, []) write_map_to_csv(duplicated_feature_coefs_csv, duplicated_feature_coefs_per_tIC, []) return
def save_features_to_residues_map(traj_file, contact_residues, feature_residues_csv, cutoff, residues_map = None, exacycle = False): if residues_map is not None: contact_residues = [r for r in contact_residues if r in residues_map.keys()] if exacycle: contact_residues = [residues_map[key] for key in contact_residues] traj = md.load_frame(traj_file, 0) #traj = fix_traj(traj) top = traj.topology residue_pairs, residue_infos = compute_contacts_below_cutoff([traj_file, [0]], cutoff = cutoff, contact_residues = contact_residues, anton = False) if exacycle: reverse_residues_map = {v: k for k, v in residues_map.items()} new_residue_pairs = [] for residue_pair in residue_pairs: new_residue_pair = [reverse_residues_map[residue_pair[0]], reverse_residues_map[residue_pair[1]]] new_residue_pairs.append(new_residue_pair) residue_pairs = new_residue_pairs new_reisdue_infos = [] for residue_info in residue_infos: new_residue_info = [(reverse_residues_map[residue_info[0][0]], residue_info[0][1], residue_info[0][2]), (reverse_residues_map[residue_info[1][0]], residue_info[1][1], residue_info[1][2])] new_residue_infos.append(new_residue_info) residue_infos = new_reisdue_infos print("There are: %d residue pairs" %len(residue_pairs)) f = open(feature_residues_csv, "wb") f.write("feature, residue.1.resSeq, residue.1.res, residue.1.chain, residue.2.resSeq, residue.2.res, residue.2.chain,\n") for i in range(0, len(residue_infos)): f.write("%d, %d, %d, %d, %d, %d, %d,\n" %(i, residue_infos[i][0][0], residue_infos[i][0][1], residue_infos[i][0][2], residue_infos[i][1][0], residue_infos[i][1][1], residue_infos[i][1][2])) f.close() return
def timefld(n): start = dt.datetime.now() tr = md.load_frame('bpti-all-1%03d.dcd' % n, 23, top=pdb) tr.atom_slice(tr.top.select('protein'), inplace=True) end = dt.datetime.now() print('Time: ', (end - start).total_seconds()) return tr
def timefld(n): start = dt.datetime.now() tr = md.load_frame("bpti-all-1%03d.dcd" % n, 23, top=pdb) tr.atom_slice(tr.top.select("protein"), inplace=True) end = dt.datetime.now() print("Time: ", (end - start).total_seconds()) return tr
def rmsd_to_structure(clusters_dir, ref_dir, text): pdbs = get_trajectory_files(clusters_dir) ref = md.load_frame(ref_dir, index=0) rmsds = np.zeros(shape=(len(pdbs), 2)) for i in range(0, len(pdbs)): print(i) pdb_file = pdbs[i] pdb = md.load_frame(pdb_file, index=0) rmsd = md.rmsd(pdb, ref, 0) rmsds[i, 0] = i rmsds[i, 1] = rmsd[0] rmsd_file = "%s/%s_rmsds.csv" % (clusters_dir, text) np.savetxt(rmsd_file, rmsds, delimiter=",")
def start(self): # read the csv file with an optional comment on the first line with open(self.filename) as f: line = f.readline() if not line.startswith('#'): f.seek(0, 0) df = pd.read_csv(f) if not all(e in df.columns for e in ('filename', 'index', 'state')): self.error('CSV file not read properly') for k in np.unique(df['state']): fn = self.outfn(k) if os.path.exists(fn): self.error('IOError: file exists: %s' % fn) frames = defaultdict(lambda: []) for fn, group in df.groupby('filename'): for _, row in group.sort('index').iterrows(): frames[row['state']].append( md.load_frame(fn, row['index'], top=self.top)) for state, samples in frames.items(): traj = samples[0].join(samples[1:]) print('saving %s...' % self.outfn(state)) traj.save(self.outfn(state), force_overwrite=False) print('done')
def _assert_toptraj_consistency(self): r""" Check if the topology and the filenames of the reader have the same n_atoms""" traj = mdtraj.load_frame(self.filenames[0], index=0, top=self.topfile) desired_n_atoms = self.featurizer.topology.n_atoms assert traj.xyz.shape[1] == desired_n_atoms, "Mismatch in the number of atoms between the topology" \ " and the first trajectory file, %u vs %u" % \ (desired_n_atoms, traj.xyz.shape[1])
def start(self): # read the csv file with an optional comment on the first line with open(self.filename) as f: line = f.readline() if not line.startswith('#'): f.seek(0, 0) df = pd.read_csv(f) if not all(e in df.columns for e in ('filename', 'index', 'state')): self.error('CSV file not read properly') for k in np.unique(df['state']): fn = self.outfn(k) if os.path.exists(fn): self.error('IOError: file exists: %s' % fn) frames = defaultdict(lambda: []) for fn, group in df.groupby('filename'): for _, row in group.sort('index').iterrows(): frames[row['state']].append( md.load_frame(fn, row['index'], top=self.top)) for state, samples in list(frames.items()): traj = samples[0].join(samples[1:]) print('saving %s...' % self.outfn(state)) traj.save(self.outfn(state), force_overwrite=False) print('done')
def reproject_oldata(): r1 = redis.StrictRedis(port=6390, decode_responses=True) cache = redis.StrictRedis(host='bigmem0006', port=6380, decode_responses=True) execlist = r1.hgetall('anl_sequence') keyorder = [ 'jc_' + i[0] for i in sorted(execlist.items(), key=lambda x: x[1]) ] # skip first 100 (non-sampled) pts = [] bad_ref = 0 miss = 0 for key in keyorder: conf = r1.hgetall(key) src = int(conf['src_index']) ref = r1.lindex('xid:reference', src) if ref is not None: fileno, frame = eval(ref) ckey = 'sim:%s' % conf['name'] xyz = cache.lindex(ckey, frame) if xyz is not None: pts.append(pickle.loads(xyz)) else: tr = md.load_frame(conf['dcd'], frame, top=conf['pdb']) if len(tr.xyz) == 0: miss += 1 else: pts.append(tr.xyz[0]) else: bad_ref += 1 traj = md.Trajectory(pts, deshaw.topo_prot.top) alpha = datareduce.filter_alpha(traj) return alpha
def sampling_along_tIC(resultdir, opath, tica_trajs, xtc_traj_folder, traj_list_array, pdb_name, tIC_a): transformed = np.concatenate(tica_trajs) draw_tica_histogram_core(transformed[:, 0], transformed[:, 1], '1', '2') tica_trajs = {i: tica_trajs[i] for i in range(len(tica_trajs)) } #tica_trajs is now a dictionary inds = sample_dimension(tica_trajs, dimension=tIC_a - 1, n_frames=200, scheme='random') #sample 200 conformations #make trajectory traj = md.join( md.load_frame(xtc_traj_folder + traj_list_array[i], index=frame_i, top=xtc_traj_folder + pdb_name) for i, frame_i in inds) #save the trajectory traj.save("%s/tica-dimension-tIC%s.xtc" % (resultdir, tIC_a - 1)) #show the samples on tICA projections samples_coord = [] for i, frame_i in inds: samples_coord.append( [tica_trajs[i][frame_i][0], tica_trajs[i][frame_i][1]]) samples_coord = np.array(samples_coord) print(samples_coord.shape) plt.plot(samples_coord[:, 0], samples_coord[:, 1], 'o-') plt.legend('sample') plt.savefig(resultdir + '/' + opath)
def onRemoveClashes(self): target_filename = chisurf.widgets.save_file('H5-Trajectory file', 'H5-File (*.h5)') # target_filename = 'clash_dimer.h5' filename = self.trajectory_filename stride = self.stride min_distance = self.min_distance # Make empty trajectory frame_0 = md.load_frame(filename, 0) target_traj = md.Trajectory(xyz=np.empty((0, frame_0.n_atoms, 3)), topology=frame_0.topology) #atom_indices = np.array(self.atom_list) atom_selection = self.atom_list atom_list = target_traj.top.select(atom_selection) target_traj.save(target_filename) chunk_size = 1000 for i, chunk in enumerate( md.iterload(filename, chunk=chunk_size, stride=stride)): xyz = chunk.xyz.copy() frames_below = below_min_distance(xyz, min_distance, atom_list=atom_list) selection = np.where(frames_below < 1)[0] xyz_clash_free = np.take(xyz, selection, axis=0) with tables.open_file(target_filename, 'a') as table: table.root.coordinates.append(xyz_clash_free) times = np.arange(table.root.time.shape[0], table.root.time.shape[0] + xyz_clash_free.shape[0], dtype=np.float32) table.root.time.append(times)
def __init__(self, ref, ref_frame=0, atom_indices=None, topology=None, precentered=False): self.top = topology assert isinstance( ref_frame, int ), "ref_frame has to be of type integer, and not %s" % type(ref_frame) # Types of inputs # 1. Filename+top if isinstance(ref, str): # Store the filename self.name = ref[:] ref = mdtraj.load_frame(ref, ref_frame, top=topology) # mdtraj is pretty good handling exceptions, we're not checking for # types or anything here # 2. md.Trajectory object elif isinstance(ref, mdtraj.Trajectory): self.name = ref.__repr__()[:] else: raise TypeError("input reference has to be either a filename or " "a mdtraj.Trajectory object, and not of %s" % type(ref)) self.ref = ref self.ref_frame = ref_frame self.atom_indices = atom_indices self.precentered = precentered self.dimension = 1
def save_pdb(traj_dir, clusterer, i): location = clusterer.cluster_ids_[i,:] traj = get_trajectory_files(traj_dir)[location[0]] print("traj = %s, frame = %d" %(traj, location[1])) conformation = md.load_frame(traj, location[1]) conformation.save_pdb("/scratch/users/enf/b2ar_analysis/clusters_1000_allprot/%d.pdb" %i) return None
def reproject_oldata(): r1 = redis.StrictRedis(port=6390, decode_responses=True) cache = redis.StrictRedis(host='bigmem0006', port=6380, decode_responses=True) execlist = r1.hgetall('anl_sequence') keyorder = ['jc_'+i[0] for i in sorted(execlist.items(), key=lambda x:x[1])] # skip first 100 (non-sampled) pts = [] bad_ref = 0 miss = 0 for key in keyorder: conf = r1.hgetall(key) src = int(conf['src_index']) ref = r1.lindex('xid:reference', src) if ref is not None: fileno, frame = eval(ref) ckey = 'sim:%s' % conf['name'] xyz = cache.lindex(ckey, frame) if xyz is not None: pts.append(pickle.loads(xyz)) else: tr = md.load_frame(conf['dcd'], frame, top=conf['pdb']) if len(tr.xyz) == 0: miss += 1 else: pts.append(tr.xyz[0]) else: bad_ref += 1 traj = md.Trajectory(pts, deshaw.topo_prot.top) alpha = datareduce.filter_alpha(traj) return alpha
def onSaveTrajectory(self, target_filename=None): if target_filename is None: target_filename = str(QtWidgets.QFileDialog.getSaveFileName(None, 'Save H5-Model file', '', 'H5-files (*.h5)'))[0] translation_vector = self.translation_vector rotation_matrix = self.rotation_matrix stride = self.stride if self.verbose: print("Stride: %s" % stride) print("\nRotation Matrix") print(rotation_matrix) print("\nTranslation vector") print(translation_vector) first_frame = md.load_frame(self.trajectory_filename, 0) traj_new = md.Trajectory(xyz=np.empty((1, first_frame.n_atoms, 3)), topology=first_frame.topology) traj_new.save(target_filename) chunk_size = 1000 table = tables.open_file(target_filename, 'a') for i, chunk in enumerate(md.iterload(self.trajectory_filename, chunk=chunk_size, stride=stride)): xyz = chunk.xyz.copy() rotate(xyz, rotation_matrix) translate(xyz, translation_vector) table.root.xyz.append(xyz) table.root.time.append(np.arange(i * chunk_size, i * chunk_size + xyz.shape[0], dtype=np.float32)) table.close()
def read_and_featurize_divided(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10): #print("reading and featurizing %s" %(filename)) traj_top = md.load_frame(filename, 0).topology atom_indices = [ a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI" ] traj = md.load(filename, atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types=dihedrals) features = featurizer.transform(traj_list=traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory) - 2] dcd_file = directory[len(directory) - 1] new_file = "%s_features_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" % (new_root_dir, condition) new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def traj(structure, trajectory, frame, contact): ### in the trajecotry take into account only atoms ### that are defined in the contact criterion file ### i.e. contact.dat molecule_types = cluster.molecules_types(contact) fc = open(structure, "r") atomsndx = [] i = -1 for line in fc: w = line.split() if (w[0] == "ATOM"): i = i + 1 if w[2] in molecule_types: atomsndx.append(i) fc.close() ### topsel=md.load(str(structure)).topology ### atomsndx=topsel.select('resname CGA or resname CGB or resname CGC') traj = md.load_frame(trajectory, top=structure, index=frame, atom_indices=atomsndx) return traj
def cluster(data_dir, traj_dir, n_clusters): reduced_data = verboseload(data_dir) trajs = np.concatenate(reduced_data) plt.hexbin(trajs[:, 0], trajs[:, 1], bins='log', mincnt=1) clusterer = MiniBatchKMedoids(n_clusters=n_clusters) clusterer.fit_transform(reduced_data) centers = clusterer.cluster_centers_ for i in range(0, np.shape(centers)[0]): center = centers[i, :] plt.scatter(center[0], center[1]) plt.annotate('C%d' % i, xy=(center[0], center[1]), xytext=(center[0] + 0.1, center[1] + 0.1), arrowprops=dict(facecolor='black', shrink=0.05)) location = clusterer.cluster_ids_[i, :] print(location) traj = get_trajectory_files(traj_dir)[location[0]] print(("traj = %s" % traj)) print(("frame = %d" % location[1])) conformation = md.load_frame(traj, location[1]) conformation.save_pdb( "/scratch/users/enf/b2ar_analysis/cluster_%d.pdb" % i) plt.show()
def read_and_featurize(filename, dihedrals=['chi2'], stride=10): #print("reading and featurizing %s" %(filename)) top = md.load_frame(filename, 0).topology #print("got top") atom_indices = [a.index for a in top.atoms if a.residue.resSeq == 93 and a.residue != "POPC" and str(a.residue)[0] == "H"] print((len(atom_indices))) #atom_indices = [a.index for a in top.atoms if a.residue.chain.index == 0 and a.residue.resSeq != 93 and a.residue != "POPC" and a.residue.resSeq != 130 and a.residue.resSeq != 172 and a.residue.resSeq != 79 and a.residue.resSeq != 341] #print("got indices") traj = md.load(filename, stride=1000, atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types = dihedrals) features = featurizer.transform(traj_list = traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory)-2] dcd_file = directory[len(directory)-1] new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" %(new_root_dir, condition) new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def subsample_traj(traj, stride=5, top=None): directory = traj.split("/") simulation = directory[len(directory) - 2] dcd_file = directory[len(directory) - 1] condition = "%s-%s" % (simulation.split('-')[1], simulation.split('-')[2]) print(("analyzing simulation %s file %s" % (simulation, dcd_file))) top_file = top top = md.load_frame(traj, 0, top=top_file).topology atom_indices = [ a.index for a in top.atoms if str(a.residue)[0:3] != "POP" and not a.residue.is_water and str(a.residue)[0:2] != "NA" and str(a.residue)[0:2] != "CL" ] traj = md.load(traj, stride=stride, top=top_file, atom_indices=atom_indices) print("traj loaded") new_file = "%s_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_allprot" new_condition_dir = "%s/%s" % (new_root_dir, condition) new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file) print(("saving trajectory as %s" % new_file_full)) traj.save(new_file_full)
def subsample_traj(traj, stride=5, top=None): directory = traj.split("/") simulation = directory[len(directory) - 2] dcd_file = directory[len(directory) - 1] condition = "%s-%s" % (simulation.split('-')[1], simulation.split('-')[2]) print(("analyzing simulation %s file %s" % (simulation, dcd_file))) top_file = top top = md.load_frame(traj, 0, top=top_file).topology atom_indices = [ a.index for a in top.atoms if a.residue.is_protein and a.residue.resSeq != 341 and a.residue.name[0:2] != "HI" and a.residue.resSeq != 79 and a.residue.resSeq != 296 and a.residue.resSeq != 269 and a.residue. resSeq != 178 and a.residue.resSeq != 93 and a.residue.name != "NMA" and a.residue.name != "NME" and a.residue.name != "ACE" ] traj = md.load(traj, stride=stride, top=top_file, atom_indices=atom_indices) new_file = "%s_stride%d.h5" % (dcd_file.rsplit(".", 1)[0], stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled" new_condition_dir = "%s/%s" % (new_root_dir, condition) new_file_full = "%s/%s/%s" % (new_root_dir, condition, new_file) print(("saving trajectory as %s" % new_file_full)) traj.save(new_file_full)
def onProcessTrajectory(self): print("onProcessTrajectory") energy_file = chisurf.widgets.save_file( description='Save energies', file_type='CSV-name file (*.txt)') s = 'FrameNbr\t' for p in self.universe.potentials: s += '%s\t' % p.name s += '\n' chisurf.fio.zipped.open_maybe_zipped(filename=energy_file, mode='w').write(s) self.structure = chisurf.structure.TrajectoryFile( mdtraj.load_frame(self.trajectory_file, 0))[0] i = 0 for chunk in mdtraj.iterload(self.trajectory_file): for frame in chunk: self.structure.xyz = frame.xyz * 10.0 self.structure.update_dist() s = '%i\t' % (i * self.stride + 1) for e in self.universe.getEnergies(self.structure): s += '%.3f\t' % e print(s) s += '\n' i += 1 open(energy_file, 'a').write(s)
def read_and_featurize_divided(filename, dihedrals=['phi', 'psi', 'chi2'], stride=10): #print("reading and featurizing %s" %(filename)) traj_top = md.load_frame(filename,0).topology atom_indices = [a.index for a in traj_top.atoms if a.residue.name[0:2] != "HI"] traj = md.load(filename,atom_indices=atom_indices) #print("got traj") featurizer = DihedralFeaturizer(types = dihedrals) features = featurizer.transform(traj_list = traj) #print(np.shape(features)) #print("finished featurizing") directory = filename.split("/") condition = directory[len(directory)-2] dcd_file = directory[len(directory)-1] new_file = "%s_features_stride%d.h5" %(dcd_file.rsplit( ".", 1 )[ 0 ] , stride) new_root_dir = "/scratch/users/enf/b2ar_analysis/subsampled_features" new_condition_dir = "%s/%s" %(new_root_dir, condition) new_file_full = "%s/%s/%s" %(new_root_dir, condition, new_file) #print("saving features as %s" %new_file_full) verbosedump(features, new_file_full) return features
def rmsd_to_structure(clusters_dir, ref_dir, text): pdbs = get_trajectory_files(clusters_dir) ref = md.load_frame(ref_dir, index=0) rmsds = np.zeros(shape=(len(pdbs),2)) for i in range(0,len(pdbs)): print i pdb_file = pdbs[i] pdb = md.load_frame(pdb_file, index=0) rmsd = md.rmsd(pdb, ref, 0) rmsds[i,0] = i rmsds[i,1] = rmsd[0] rmsd_file = "%s/%s_rmsds.csv" %(clusters_dir, text) np.savetxt(rmsd_file, rmsds, delimiter=",")
def _start(self): print("model") print(self.model_dict) n_features = float(self.model_dict['n_features']) n_states = float(self.model_dict['n_states']) self.model = MetastableSwitchingLDS(n_states, n_features) self.model.load_from_json_dict(self.model_dict) obs, hidden_states = self.model.sample(self.args.n_samples) (n_samples, n_features) = np.shape(obs) features, ii, ff = mixtape.featurizer.featurize_all( self.filenames, self.featurizer, self.topology, self.stride) file_trajectories = [] states = [] state_indices = [] state_files = [] logprob = log_multivariate_normal_density( features, np.array(self.model.means_), np.array(self.model.covars_), covariance_type='full') assignments = np.argmax(logprob, axis=1) probs = np.max(logprob, axis=1) # Presort the data into the metastable wells # i.e.: separate the original trajectories into k # buckets corresponding to the metastable wells for k in range(int(self.model.n_states)): # pick the structures that have the highest log # probability in the state s = features[assignments == k] ind = ii[assignments==k] f = ff[assignments==k] states.append(s) state_indices.append(ind) state_files.append(f) # Loop over the generated feature space trajectory. # At time t, pick the frame from the original trajectory # closest to the current sample in feature space. To save # a bit of computation, just search in the bucket corresponding # to the current metastable well (i.e., the current hidden state). traj = None for t in range(n_samples): featurized_frame = obs[t] h = hidden_states[t] logprob = log_multivariate_normal_density( states[h], featurized_frame[np.newaxis], self.model.Qs_[h][np.newaxis], covariance_type='full') best_frame_pos = np.argmax(logprob, axis=0)[0] best_file = state_files[h][best_frame_pos] best_ind = state_indices[h][best_frame_pos] frame = md.load_frame(best_file, best_ind, self.topology) if t == 0: traj = frame else: frame.superpose(traj, t-1) traj = traj.join(frame) traj.save('%s.xtc' % self.out) traj[0].save('%s.xtc.pdb' % self.out)
def gen_structures(ys, reference, filenames, outs, N_atoms): atom_indices = arange(N_atoms) xx, ii, ff = load_timeseries(filenames, atom_indices, reference) for y, out in zip(ys, outs): i = np.argmin(np.sum((y - xx)**2, axis=1)) frame = md.load_frame(ff[i], ii[i]) frame.superpose(reference) frame.save('%s.pdb' % out)
def test_load_frame(): files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos', 'legacy_msmbuilder_trj0.lh5'] trajectories = [md.load(get_fn(f), top=get_fn('native.pdb')) for f in files] rand = [np.random.randint(len(t)) for t in trajectories] frames = [md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand)] for traj, frame, r, f in zip(trajectories, frames, rand, files): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f)) t1 = md.load(get_fn('2EQQ.pdb')) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn('2EQQ.pdb'), r) eq(t1[r].xyz, t2.xyz)
def save_pdb(traj_dir, clusterer, i): location = clusterer.cluster_ids_[i, :] traj = get_trajectory_files(traj_dir)[location[0]] print(("traj = %s, frame = %d" % (traj, location[1]))) conformation = md.load_frame(traj, location[1]) conformation.save_pdb( "/scratch/users/enf/b2ar_analysis/clusters_1000_allprot/%d.pdb" % i) return None
def plot_rmsd(trajectories, topology=None, subset=None, output='rmsd.dat', chunksize=100, reimage=False): import mdtraj import numpy as np from tqdm import tqdm if topology: topology = mdtraj.load_topology(topology) if subset: subset = topology.select(subset) trajectories = sorted(trajectories, key=sort_key_for_numeric_suffixes) first_frame = mdtraj.load_frame(trajectories[0], 0, top=topology) frame_size = first_frame.xyz[0].nbytes if reimage: first_frame.image_molecules(inplace=True) rmsds = [] for trajectory in tqdm(trajectories, unit='file'): _, ext = os.path.splitext(trajectory) total, unit_scale = None, None if ext.lower() == '.dcd': n_frames = round( os.path.getsize(trajectory) / frame_size, -1 * len(str(chunksize)[1:])) total = int(n_frames / chunksize) unit_scale = chunksize itertraj = mdtraj.iterload(trajectory, top=topology, chunk=chunksize) tqdm_kwargs = { 'total': total, 'unit': 'frames', 'unit_scale': unit_scale, 'postfix': { 'traj': trajectory } } for chunk in tqdm(itertraj, **tqdm_kwargs): if reimage: chunk.image_molecules(inplace=True) rmsd = mdtraj.rmsd(chunk, first_frame, atom_indices=subset) * 10.0 # nm->A rmsds.append(rmsd) rmsds = np.concatenate(rmsds) with open(output, 'w') as f: f.write('\n'.join(map(str, rmsds))) print('\nWrote RMSD values to', output) print('Plotting results...') plt.plot(rmsds) fig = plt.gca() fig.set_title('{}{}'.format( trajectories[0], ' and {} more'.format( len(trajectories[1:]) if len(trajectories) > 1 else ''))) fig.set_xlabel('Frames') fig.set_ylabel('RMSD (A)') plt.show()
def _eval_traj_shapes(self): lengths = np.zeros(self.n_trajs) n_atoms = np.zeros(self.n_trajs) for i in xrange(self.n_trajs): filename = self.traj_filename(i) with md.open(filename) as f: lengths[i] = len(f) n_atoms[i] = md.load_frame(filename, 0).n_atoms return lengths, n_atoms
def test_residues_map(traj_file_1, traj_file_2, residues, residues_map): traj_1 = md.load_frame(traj_file_1, index=0) traj_2 = md.load_frame(traj_file_2, index=0) top1 = traj_1.topology top2 = traj_2.topology for residue in residues: new_residue = residues_map[residue] print("Original residues:") residues = [ r for r in top1.residues if r.resSeq == residue and r.is_protein ] print((residues[0])) print("New residues:") residues = [ r for r in top2.residues if r.resSeq == new_residue and r.is_protein ] print((residues[0])) return
def solublim(dirout, contact, trajectory, structure, cst): ### read the box size and calculate the volume fi = open(str(dirout) + "/box.dat", "r") data = numpy.loadtxt(fi) bx = data[0] by = data[1] bz = data[2] fi.close() bv = bx * by * bz ### read molecules radii radii = cluster.molecules_radii(contact) ### read and divide molecules between large and small clusters lc = [] ### molecules in large clusters sc = [] ### molecules in small clusters fi = open(str(dirout) + "/cluster.dat", "r") for line in fi: w = line.split() c = w[1:] ### skip the first item, i.e. cluster size if (len(c) > cst): for j in c: lc.append(j) else: for j in c: sc.append(j) fi.close() ### read molecule types and calculate the volume lv = 0.0 traj = mdtraj.load_frame(trajectory, top=structure, index=0) for lm in lc: res = traj.topology.atom(int(lm)).name rad = radii[res] lv += 4.0 / 3.0 * 3.141592653589793 * rad**3 ### solubility limit [mM] ### number of proteins in small clusters per saturated volume, ### i.e. box volume minus volume of proteins in large clusters ### 1 prot. / 1 nm3 = 1.660539 [M] fi = open(str(dirout) + "/solublim.dat", "w") print(len(sc) / (bv - lv) * 1.660539 * 1000, file=fi) fi.close()
def load_frame(base_dir, protein, traj_folder, filename, frame_index): """ :param base_dir: Project's base dir :param protein: Protein of interest :param filename: file to load :param frame_index: needed frame :return: The required frame """ os.chdir(os.path.join(base_dir, protein,traj_folder)) filename = os.path.splitext(filename)[0] return mdt.load_frame(filename="%s.hdf5"%filename, index=frame_index)
def read_and_featurize(traj_file, features_dir = None, condition=None, dihedral_types = ["phi", "psi", "chi1", "chi2"], dihedral_residues = None, resSeq_pairs = None, iterative = True): a = time.time() dihedral_indices = [] residue_order = [] if len(dihedral_residues) > 0: for dihedral_type in dihedral_types: if dihedral_type == "phi": dihedral_indices.append(phi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "psi": dihedral_indices.append(psi_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi1": dihedral_indices.append(chi1_indices(fix_topology(top), dihedral_residues)) if dihedral_type == "chi2": dihedral_indices.append(chi2_indices(fix_topology(top), dihedral_residues)) #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") dihedral_angles = [] for dihedral_type in dihedral_indices: angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=dihedral_type)) dihedral_angles.append(np.sin(angles)) dihedral_angles.append(np.cos(angles)) manual_features = np.transpose(np.concatenate(dihedral_angles)) if len(resSeq_pairs) > 0: top = md.load_frame(traj_file, index=0).topology resIndex_pairs = convert_resSeq_to_resIndex(top, resSeq_pairs) contact_features = [] if iterative: try: for chunk in md.iterload(traj_file, chunk = 1000): # chunk = fix_traj(chunk) #chunk = md.load(traj_file,stride=1000) #print(resIndex_pairs[0:10]) chunk_features = md.compute_contacts(chunk, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] print(np.shape(chunk_features)) contact_features.append(chunk_features) contact_features = np.concatenate(contact_features) except Exception,e: print str(e) print("Failed") return #traj = md.load(traj_file) #contact_features = md.compute_contacts(chunk, contacts = contact_residue_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] else: try: traj = md.load(traj_file) contact_features = md.compute_contacts(traj, contacts = resIndex_pairs, scheme = 'closest-heavy', ignore_nonprotein=False)[0] except Exception,e: print str(e) print("Failed for traj") return
def test_load_frame(ref_traj, get_fn): if ref_traj.fobj is md.formats.GroTrajectoryFile: pytest.xfail("Gro doesn't implement seek") trajectory = md.load(get_fn(ref_traj.fn), top=get_fn('native.pdb')) rand = np.random.randint(len(trajectory)) frame = md.load_frame(get_fn(ref_traj.fn), index=rand, top=get_fn('native.pdb')) if ref_traj.fobj is md.formats.DTRTrajectoryFile: pytest.xfail("DTR doesn't load a single frame properly") eq(trajectory[rand].xyz, frame.xyz) eq(trajectory[rand].unitcell_vectors, frame.unitcell_vectors) if has_time_info(ref_traj.fext): eq(trajectory[rand].time, frame.time)
def get_samples(cluster, trajectories, clusters_map, clusterer_dir, features_dir, traj_dir, save_dir, n_samples, method): num_configurations = len(clusters_map[cluster]) if method == "random": try: indices = random.sample(range(num_configurations), n_samples) except: return(range(0, min(n_samples, num_configurations))) #print indices else: indices = range(0, min(n_samples, num_configurations)) for s in range(0, n_samples): if s == len(clusters_map[cluster]): return(indices[0:s]) if method != "random": k = s else: k = indices[s] sample = clusters_map[cluster][k] traj_id = sample[0] frame = sample[1] traj = trajectories[traj_id] print("cluster %d sample %d" %(cluster, k)) #print traj #traj_obj = md.load(traj) #print traj_obj #print frame top = md.load_frame(traj, index=frame).topology atom_indices = [a.index for a in top.atoms if str(a.residue)[0:3] != "SOD" and str(a.residue)[0:3] != "CLA" and a.residue.resSeq < 341] #print indices conformation = md.load_frame(traj, index=frame, atom_indices=sorted(atom_indices)) conformation.save_pdb("%s/cluster%d_sample%d.pdb" %(save_dir, cluster, s)) print(cluster) #print(indices) #print(len(indices)) return indices
def read_and_featurize_custom(traj_file, condition=None, location=None, dihedral_residues = None, distance_residues = None): top = md.load_frame(traj_file,index = 0).topology #atom_indices = [a.index for a in top.atoms if a.residue.resSeq != 130] atom_indices = [a.index for a in top.atoms] traj = md.load(traj_file, atom_indices=atom_indices) print traj_file #print traj #print("loaded trajectory") ''' a = time.time() featurizer = DihedralFeaturizer(types = ['phi', 'psi', 'chi2']) features = featurizer.transform(traj) b = time.time() #print(b-a) print("original features has dim") print(np.shape(features)) ''' a = time.time() phi_tuples = phi_indices(traj.topology, dihedral_residues) psi_tuples = psi_indices(traj.topology, dihedral_residues) chi2_tuples = chi2_indices(traj.topology, dihedral_residues) #if distance_residues is not None: #print("new features has dim %d" %(2*len(phi_tuples) + 2*len(psi_tuples) + 2*len(chi2_tuples))) #print("feauturizing manually:") phi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=phi_tuples)) psi_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=psi_tuples)) chi2_angles = np.transpose(ManualDihedral.compute_dihedrals(traj=traj,indices=chi2_tuples)) manual_features = np.concatenate([np.sin(phi_angles), np.cos(phi_angles), np.sin(psi_angles), np.cos(psi_angles), np.sin(chi2_angles), np.cos(chi2_angles)]) b = time.time() #print(b-a) print("new features has shape: ") print(np.shape(manual_features)) if condition is None: condition = get_condition(traj_file) if location is None: location = "/scratch/users/enf/b2ar_analysis/features_allprot" verbosedump(manual_features, "%s/%s.h5" %(location, condition))
def reimage_traj(traj_file, traj_dir, save_dir, ext): if ext == ".pdb": file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1] filename = file_lastname.split(".")[0] h5_filename = file_lastname new_h5_file = "%s/%s" % (save_dir, h5_filename) if os.path.exists(new_h5_file): print "already reimaged" return traj_pytraj = mdio.load(traj_file, top=traj_file)[:] # traj_pytraj.fixatomorder() traj_pytraj.autoimage() traj_pytraj.save(new_h5_file) print "saving %s" % h5_filename else: traj_file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1] traj_filename = traj_file_lastname.split(".")[0] traj_dcd = "%s/%s.dcd" % (traj_dir, traj_filename) traj_pdb = "%s/%s.pdb" % (traj_dir, traj_filename) traj = md.load(traj_file) traj_frame = md.load_frame(traj_file, index=0) traj.save_dcd(traj_dcd) traj_frame.save_pdb(traj_pdb) traj_pytraj = mdio.load(traj_dcd, top=traj_pdb)[:] traj_pytraj.autoimage() file_lastname = traj_file.split("/")[len(traj_file.split("/")) - 1] filename = file_lastname.split(".")[0] dcd_filename = "%s_temp.dcd" % filename top_filename = "%s_temp.pdb" % filename h5_filename = file_lastname new_dcd_file = "%s/%s" % (save_dir, dcd_filename) new_top_file = "%s/%s" % (save_dir, top_filename) new_h5_file = "%s/%s" % (save_dir, h5_filename) print new_dcd_file print new_top_file traj_pytraj.save(new_dcd_file) traj_pytraj.save(new_top_file) new_traj = md.load(new_dcd_file, top=traj_pdb) new_traj.save(new_h5_file) os.remove(traj_dcd) os.remove(traj_pdb) os.remove(new_dcd_file) os.remove(new_top_file) return
def gen_movie(ys, reference, filenames, out, N_atoms): """Assumes that we have a full featurization of the model. Should fix this for larger systems. """ atom_indices = arange(N_atoms) xx, ii, ff = load_timeseries(filenames, atom_indices, reference) movieframes = [] for y in ys: i = np.argmin(np.sum((y - xx)**2, axis=1)) movieframes.append(md.load_frame(ff[i], ii[i])) movie = reduce(lambda a, b: a.join(b), movieframes) movie.superpose(movie) movie.save('%s.xtc' % out) movie[0].save('%s.xtc.pdb' % out)
def __init__(self, input_prmtop, input_trjname, clustercenter_file): """ Data members """ """ Initializes an object of HSAcalcs class """ self.hsa_data = self._initializeHSADict(clustercenter_file) print "Reading in topology ..." first_frame = md.load_frame(input_trjname, 0, top=input_prmtop) self.top = first_frame.topology print "Generating atom indices ..." self._indexGenerator() print "Done..."
def test_load_frame(): files = ['frame0.nc', 'frame0.h5', 'frame0.xtc', 'frame0.trr', 'frame0.dcd', 'frame0.mdcrd', 'frame0.binpos', 'frame0.xyz', 'frame0.lammpstrj'] if not (on_win and on_py3): files.append('legacy_msmbuilder_trj0.lh5') trajectories = [md.load(get_fn(f), top=get_fn('native.pdb')) for f in files] rand = [np.random.randint(len(t)) for t in trajectories] frames = [md.load_frame(get_fn(f), index=r, top=get_fn('native.pdb')) for f, r in zip(files, rand)] for traj, frame, r, f in zip(trajectories, frames, rand, files): def test(): eq(traj[r].xyz, frame.xyz) eq(traj[r].unitcell_vectors, frame.unitcell_vectors) eq(traj[r].time, frame.time, err_msg='%d, %d: %s' % (traj[r].time[0], frame.time[0], f)) test.description = 'test_load_frame: %s' % f yield test t1 = md.load(get_fn('2EQQ.pdb')) r = np.random.randint(len(t1)) t2 = md.load_frame(get_fn('2EQQ.pdb'), r) eq(t1[r].xyz, t2.xyz)
def reimage_traj(traj_file, save_dir): traj = md.load(traj_file) topology = md.load_frame(traj_file,index=0) traj_pytraj = mdio.load_mdtraj(traj) traj_pytraj.autoimage() traj.xyz[:] = traj_pytraj.xyz / 10. filename = traj_file.split("/")[len(traj_file.split("/"))-1] filename = filename.split(".")[0] h5_filename = "%s.h5" %filename new_h5_file = "%s/%s" %(save_dir, h5_filename) print new_h5_file traj.save(new_h5_file) return