def test_compute_state_energy_gradients(tmpdir): build_tip3p_smirnoff_force_field().json(os.path.join(tmpdir, "ff.json")) _, parameterized_system = _setup_dummy_system( tmpdir, Substance.from_components("O"), 10, os.path.join(tmpdir, "ff.json")) protocol = SolvationYankProtocol("") protocol.thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin, 1.0 * unit.atmosphere) protocol.gradient_parameters = [ ParameterGradientKey("vdW", "[#1]-[#8X2H2+0:1]-[#1]", "epsilon") ] gradients = protocol._compute_state_energy_gradients( mdtraj.load_dcd( get_data_filename("test/trajectories/water.dcd"), get_data_filename("test/trajectories/water.pdb"), ), parameterized_system.topology, parameterized_system.force_field.to_force_field(), True, ComputeResources(), ) assert len(gradients) == 1 assert not np.isclose(gradients[0].value, 0.0 * unit.dimensionless)
def execute(self, directory, available_resources): import mdtraj if len(self.input_coordinate_paths) != len(self.input_trajectory_paths): return PropertyEstimatorException(directory=directory, message='There should be the same number of ' 'coordinate and trajectory paths.') if len(self.input_trajectory_paths) == 0: return PropertyEstimatorException(directory=directory, message='No trajectories were ' 'given to concatenate.') trajectories = [] output_coordinate_path = None for coordinate_path, trajectory_path in zip(self.input_coordinate_paths, self.input_trajectory_paths): output_coordinate_path = output_coordinate_path or coordinate_path trajectories.append(mdtraj.load_dcd(trajectory_path, coordinate_path)) self.output_coordinate_path = output_coordinate_path output_trajectory = trajectories[0] if len(trajectories) == 1 else mdtraj.join(trajectories, False, False) self.output_trajectory_path = path.join(directory, 'output_trajectory.dcd') output_trajectory.save_dcd(self.output_trajectory_path) return self._get_output_dictionary()
def _execute(self, directory, available_resources): import mdtraj if len(self.input_coordinate_paths) != len( self.input_trajectory_paths): raise ValueError( "There should be the same number of coordinate and trajectory paths." ) if len(self.input_trajectory_paths) == 0: raise ValueError("No trajectories were given to concatenate.") trajectories = [] output_coordinate_path = None for coordinate_path, trajectory_path in zip( self.input_coordinate_paths, self.input_trajectory_paths): output_coordinate_path = output_coordinate_path or coordinate_path trajectories.append( mdtraj.load_dcd(trajectory_path, coordinate_path)) self.output_coordinate_path = output_coordinate_path output_trajectory = (trajectories[0] if len(trajectories) == 1 else mdtraj.join(trajectories, False, False)) self.output_trajectory_path = path.join(directory, "output_trajectory.dcd") output_trajectory.save_dcd(self.output_trajectory_path)
def frames(self, dcd, top, maxframes): for i in range(maxframes): frame = md.load_dcd(dcd, top=top, stride=None, atom_indices=None, frame=i) yield frame
def load_trajs(run_dir, parent_dir, top_dir, load_stride=None): """Loads trajectories from the same run with the same condition into a list using mdtraj.load_dcd Keyword arguments ----------------- load_stride : int, default = None mdtraj to read every stride-th frame Arguments --------- run_dir : str the directory where all files for a single simulation run lives parent_dir : str the directory where all simulations are stored Returns ------- List, mdtraj objects that are all from the same simulation run, in order """ trajs = [] traj_files = [] for this_file in os.listdir(parent_dir + run_dir): if this_file.endswith('.dcd'): traj_files.append(this_file) if len(traj_files) > 0: for this_file in sorted(traj_files): topology = top_dir + '/pnas2011a-' + this_file.split('-')[1] \ + '-0-no-water-no-lipid.pdb' if load_stride == None: this_traj = md.load_dcd(parent_dir + run_dir + '/' + this_file, top=topology) else: this_traj = md.load_dcd(parent_dir + run_dir + '/' + this_file, top=topology, stride=load_stride) trajs.append(this_traj) else: print('There are no .dcd files in %s /n Returned traj list is empty' % (parent_dir + run_dir)) return trajs
def frames(dcd, top, maxframes): maxframes = math.ceil(maxframes / len(dcd)) for name in dcd: for i in range(maxframes): frame = md.load_dcd(name, top=top, stride=None, atom_indices=None, frame=i) yield frame
def read_trajs(self, framelist): #data = [] trajs = [] for frame in framelist: #framedata = [] #print 'Reading: ', frame traj = md.load_dcd(frame, self.File_TOP, stride=self.nSubSample) trajs.append(traj) return trajs
def main(): import mdtraj generate_trajectories() subsample_inputs = [ (0, 1), (0, 5), (0, 10), (1, 1), (3, 5), (9, 10) ] for equilibration_index, stride, in subsample_inputs: start_time = time.perf_counter() subsample_trajectory = ExtractUncorrelatedTrajectoryData('stream_subsample') subsample_trajectory.input_coordinate_file = 'coords.pdb' subsample_trajectory.input_trajectory_path = 'trajectory.dcd' subsample_trajectory.equilibration_index = equilibration_index subsample_trajectory.statistical_inefficiency = stride subsample_trajectory.execute('', None) protocol_total_time = (time.perf_counter() - start_time) * 1000 start_time = time.perf_counter() subsample_trajectory_memory('memory.dcd', equilibration_index, stride) memory_total_time = (time.perf_counter() - start_time) * 1000 print(f'Eq Index={equilibration_index} ' f'Stride={stride} ' f'Protocol Time={protocol_total_time}s ' f'Memory Time={memory_total_time}s') stream_trajectory = mdtraj.load_dcd(filename=subsample_trajectory.output_trajectory_path, top='coords.pdb') memory_trajectory = mdtraj.load_dcd(filename='memory.dcd', top='coords.pdb') assert len(stream_trajectory) == len(memory_trajectory) assert np.allclose(stream_trajectory.xyz, memory_trajectory.xyz) assert np.allclose(stream_trajectory.unitcell_lengths, memory_trajectory.unitcell_lengths) assert np.allclose(stream_trajectory.unitcell_angles, memory_trajectory.unitcell_angles)
def subsample_trajectory_memory(output_name, equilibration_index=0, stride=5): """Subsamples a trajectory by fully loading it into memory. """ import mdtraj trajectory = mdtraj.load_dcd(filename='trajectory.dcd', top='coords.pdb') trajectory = trajectory[equilibration_index:] uncorrelated_indices = [index for index in range(0, trajectory.n_frames, stride)] uncorrelated_trajectory = trajectory[uncorrelated_indices] uncorrelated_trajectory.save_dcd(output_name)
def load_trajs(run_dir, parent_dir, top_dir, load_stride=None): """Loads trajectories from the same run with the same condition into a list using mdtraj.load_dcd Keyword arguments ----------------- load_stride : int, default = None mdtraj to read every stride-th frame Arguments --------- run_dir : str the directory where all files for a single simulation run lives parent_dir : str the directory where all simulations are stored Returns ------- List, mdtraj objects that are all from the same simulation run, in order """ trajs = [] traj_files = [] for this_file in os.listdir(parent_dir + run_dir): if this_file.endswith(".dcd"): traj_files.append(this_file) if len(traj_files) > 0: for this_file in sorted(traj_files): topology = top_dir + "/pnas2011a-" + this_file.split("-")[1] + "-0-no-water-no-lipid.pdb" if load_stride == None: this_traj = md.load_dcd(parent_dir + run_dir + "/" + this_file, top=topology) else: this_traj = md.load_dcd(parent_dir + run_dir + "/" + this_file, top=topology, stride=load_stride) trajs.append(this_traj) else: print("There are no .dcd files in %s /n Returned traj list is empty" % (parent_dir + run_dir)) return trajs
def main(): #declare topology (.pdb) file and trajectory (.dcd) file topfile = sys.argv[1] trjfile = sys.argv[2] #settings for dimer choice. ndim = int(input("how many dimer output files?\n")) cflag = int( input( "criterion type? 1: atomic contact distance, 2: specific pair(unable), 3: H-bond(unable)\n" )) if cflag == 1: crit1 = float(input("The threshold contact distance? (in A)\n")) crit1 *= 0.1 #unit conversion from angstrom to nanometers crit2 = int( input( "Number of required atom-atom contact for dimer selection?\n")) crit = [crit1, crit2] start_time = timeit.default_timer() #load files and prepare parameters oritopol = md.load(topfile).topology table, bonds = oritopol.to_dataframe() table.loc[table['name'] == 'O2a', 'name'] = 'O' table.loc[table['name'] == 'Nm1', 'name'], table.loc[table['name'] == 'Nm2', 'name'] = 'N', 'N' table.loc[table['name'] == 'C2a', 'name'] = 'C' table.loc[ table['name'] == 'Hm1','name' ], table.loc[ table['name'] == 'Hm2','name' ],\ table.loc[ table['name'] == 'Hm3','name' ], table.loc[ table['name'] == 'Hm4','name' ] = 'H','H','H','H' topology = md.Topology.from_dataframe(table, bonds) traj = md.load_dcd(trjfile, top=topology) nstep = traj.n_frames traj = traj.atom_slice( topology.select( 'name != DO and name != DN1 and name != DN2 and name != DC')) #loop for dindex in range(ndim): #load 1 frame, then select dimer ntarf = int(nstep / 2 + (nstep / 2.0) / ndim * dindex) #index of target frame oneframe = traj[ntarf] #print(oneframe) sel_dimer = search_dimer(oneframe, crit) #write xyz file sel_dimer.save_xyz('urea_urea_' + str(dindex) + '.xyz') if dindex % 50 == 0: elapsed = timeit.default_timer() - start_time print('step {} time {:10.4f}'.format(dindex, elapsed))
def analyze(dcdFileName, topologyFileName): traj = md.load_dcd(dcdFileName, topologyFileName) print(traj) print("shape : ", traj.xyz.shape) host_atoms = [ atom.index for atom in traj.topology.atoms if atom.residue.name == 'OAH' ] guest_atoms = [ atom.index for atom in traj.topology.atoms if atom.residue.name == 'GOA' ] for frame_id in range(traj.n_frames): host_centroid = np.mean(traj.xyz[frame_id, host_atoms, :], axis=0) guest_centroid = np.mean(traj.xyz[frame_id, guest_atoms, :], axis=0) print(np.linalg.norm(host_centroid - guest_centroid))
def test_thinning(): from glob import glob dcds = glob(f"data/test_data/droplet/molDWRow_298/*dcd") top = f"data/test_data/droplet/molDWRow_298/molDWRow_298_in_droplet.pdb" max_snapshots_per_window = 20 print(dcds) for f in dcds: print(f) traj = md.load_dcd(f, top=top) quarter_traj_limit = int(len(traj) / 4) snapshots = traj[min(quarter_traj_limit, 10) :].xyz * unit.nanometer further_thinning = max(int(len(snapshots) / max_snapshots_per_window), 1) snapshots = snapshots[::further_thinning][:max_snapshots_per_window] print(len(snapshots)) assert max_snapshots_per_window == len(snapshots)
def write_clusters(k, trajname, top, outdir, name_mod): """ This function writes a new dcd trajectory for each k-means cluster defined by PCA functions. This might be useful if you want to perform structural analyses on individual clusters. """ print 'loaded', outdir + 'PCA_kmeans_clusters' + name_mod + '.npy' k = np.loadtxt(outdir + 'PCA_kmeans_clusters' + name_mod + '.npy') # Check to see if cluster trajectories already exist count = 0 for cluster in range(int(max(k)) + 1): if os.path.isfile(outdir + 'cluster_' + str(cluster) + name_mod + '.dcd'): count += 1 if count == int(max(k) + 1): print "cluster trajectories already exist" return None #---DCD if trajname.split('.')[-1] == 'dcd': dcd = md.load_dcd(trajname, top) for cluster in range(int(max(k)) + 1): with md.formats.DCDTrajectoryFile( outdir + 'cluster_' + str(cluster) + name_mod + '.dcd', 'w') as f: for frames in np.where(k == cluster): for fr in frames: f.write(dcd[fr].xyz * 10) #---PDB elif trajname.split('.')[-1] == 'txt': struc = [] for PDB in open(trajname): struc.append(md.load(PDB.split("\n")[0])) for cluster in range(int(max(k)) + 1): with md.formats.DCDTrajectoryFile( outdir + 'cluster_' + str(cluster) + name_mod + '.dcd', 'w') as f: for frames in np.where(k == cluster): for fr in frames: f.write(struc[fr].xyz * 10) print "done with cluster", cluster return None
def test_compare_energies_acetylacetone_enol_waterbox(caplog): caplog.set_level(logging.WARNING) from transformato import FreeEnergyCalculator import mdtraj as md env = "waterbox" base = "data/acetylacetone-keto-acetylacetone-enol-rsfe/acetylacetone-enol/" ( output_files_enol, output_files_keto, ) = get_output_files_acetylaceton_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-acetylacetone-tautomer-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir="data/", output_dir="data" ) # NOTE: for preprocessing input_dir is the output dir f = FreeEnergyCalculator(configuration, "acetylacetone-enol") for idx, b in enumerate(output_files_enol): traj = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) # used load_dcd for CHARMM traj.save_dcd(f"{base}/traj.dcd", force_overwrite=True) l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env) # load dcd with openMM traj = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj.read() xyz = xyz / 10 # correct the conversion unitcell_lengths = unitcell_lengths / 10 l_openMM = f._evaluate_e_on_all_snapshots_openMM( xyz, unitcell_lengths, idx + 1, env ) assert len(l_charmm) == len(l_openMM) s = abs(np.array(l_charmm) - np.array(l_openMM)) mae = np.sum(s) / len(s) print(mae) assert mae < 1.0 for e_charmm, e_openMM in zip(l_charmm, l_openMM): assert np.isclose(e_charmm, e_openMM, rtol=1e-2)
def concatenate_folder(fname, top_loc="./starting_coordinates/0.pdb", stride=1): flist = sorted(glob.glob("./%s/trajectory.dcd.bak.*" % fname), key=keynat) flist.extend(glob.glob("./%s/trajectory.dcd" % fname)) print(flist) top = md.load(top_loc) trj_list = [] for i in flist: try: trj_list.append(md.load_dcd(i, top=top, stride=stride)) except: pass trj = trj_list[0] + trj_list[1:] trj.remove_solvent().save_xtc("%s/%s.xtc" % (fname, fname)) print("Found %d trajs" % len(trj_list)) return
def _execute(self, directory, available_resources): import mdtraj # Load in the inputs. trajectory = mdtraj.load_dcd(self.trajectory_file_path, self.parameterized_system.topology_path) system = self.parameterized_system.system # Re-evaluate the energies. self.output_observables = _evaluate_energies( self.thermodynamic_state, system, trajectory, available_resources, self.enable_pbc, False, ) # Optionally compute any gradients. if len(self.gradient_parameters) == 0: return if not isinstance(self.parameterized_system.force_field, SmirnoffForceFieldSource): raise ValueError( "Derivates can only be computed for systems parameterized with SMIRNOFF " "force fields.") force_field = self.parameterized_system.force_field.to_force_field() _compute_gradients( self.gradient_parameters, self.output_observables, force_field, self.thermodynamic_state, self.parameterized_system.topology, trajectory, available_resources, self.enable_pbc, )
def test_compare_energies_2OJ9_original_vacuum(caplog): caplog.set_level(logging.WARNING) from transformato import FreeEnergyCalculator import mdtraj as md env = "vacuum" base = f"{get_testsystems_dir()}/2OJ9-original-2OJ9-tautomer-rsfe/2OJ9-original/" output_files_t1, _ = get_output_files_2oj9_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir=get_testsystems_dir(), output_dir=get_testsystems_dir() ) # NOTE: for preprocessing input_dir is the output dir f = FreeEnergyCalculator(configuration, "2OJ9-original") for idx, b in enumerate(output_files_t1): # used load_dcd for CHARMM traj = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) traj.save_dcd(f"{base}/traj.dcd") l_charmm = f._evaluate_e_on_all_snapshots_CHARMM(traj, idx + 1, env) # load dcd with openMM traj = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj.read() xyz = xyz / 10 # correct the conversion l_openMM = f._evaluate_e_on_all_snapshots_openMM( xyz, unitcell_lengths, idx + 1, env ) assert len(l_charmm) == len(l_openMM) s = abs(np.array(l_charmm) - np.array(l_openMM)) print(s) for e_charmm, e_openMM in zip(l_charmm, l_openMM): assert np.isclose(e_charmm, e_openMM, rtol=0.2) mae = np.sum(s) / len(s) assert mae < 0.005
def __del__(self): # Get positions from pdb file traj = md.load_dcd(self._tempDCDFileName, top=self._tempPDBFileName) # Compute MSD for each frame MSD_list = [] initial_positions = traj.xyz[0] for positions in traj.xyz: molecule_MSD_list = [] for molecule, mass in zip(self._reportedMolecules, self._reportedMoleculesMass): molecule_MSD = self._computeMoleculeMSD( initial_positions[molecule], positions[molecule], mass) molecule_MSD_list.append(molecule_MSD) MSD_list.append(np.mean(molecule_MSD_list)) # Save data to csv file odict = OrderedDict() odict['Time (ps)'] = self._times if self._step: odict['Step'] = self._steps odict['MSD (nm^2)'] = MSD_list df = pd.DataFrame(data=odict) df.to_csv(path_or_buf=self._fileName, index=False) # Close temporary dcd file self._tempOut.close() # Delete temporary dcd file try: os.remove(self._tempDCDFileName) except OSError: print("Temporary DCD file not found.") # Delete temporary pdb file try: os.remove(self._tempPDBFileName) except OSError: print("Temporary PDB file not found.")
def test_compute_gradients(tmpdir, smirks, all_zeros): # Load a short trajectory. coordinate_path = get_data_filename("test/trajectories/water.pdb") trajectory_path = get_data_filename("test/trajectories/water.dcd") trajectory = mdtraj.load_dcd(trajectory_path, coordinate_path) observables = ObservableFrame({ "PotentialEnergy": ObservableArray( np.zeros(len(trajectory)) * unit.kilojoule / unit.mole) }) _compute_gradients( [ParameterGradientKey("vdW", smirks, "epsilon")], observables, ForceField("openff-1.2.0.offxml"), ThermodynamicState(298.15 * unit.kelvin, 1.0 * unit.atmosphere), Topology.from_mdtraj(trajectory.topology, [Molecule.from_smiles("O")]), trajectory, ComputeResources(), True, ) assert len( observables["PotentialEnergy"].gradients[0].value) == len(trajectory) if all_zeros: assert np.allclose( observables["PotentialEnergy"].gradients[0].value, 0.0 * unit.kilojoule / unit.kilocalorie, ) else: assert not np.allclose( observables["PotentialEnergy"].gradients[0].value, 0.0 * unit.kilojoule / unit.kilocalorie, )
def analyze_clusters(self): """ Structures can be characterized by PCA, and the structures can be clustered using k-means in PC-space. This function performs a structural analysis on each k-means cluster. """ self.name_mod = '_ros' k = np.loadtxt(sa.outdir + 'PCA_kmeans_clusters' + sa.name_mod + '.npy') # Be careful with this. Not all calcs need to be redone (i.e. Rg, SASA, EED, PCA) # However, cmaps and SS both need to be recomputed because they were already averaged self.calcs = ['flory'] if self.top == "": self.load_top() old_name_mod = self.name_mod for cluster in range(int(max(k)) + 1): self.cmaps = [] self.SS = [] dcd = md.load_dcd( self.outdir + 'cluster_' + str(cluster) + old_name_mod + '.dcd', self.top) self.name_mod = '_cluster_' + str(cluster) print "CLUSTER:", cluster self.nres = dcd[0].n_residues for struc in dcd: self.protein_calcs(struc) if 'cmaps' in self.calcs: sa_core.av_cmaps(self.cmaps, self.nres, self.seq, self.outdir, self.name_mod, "NULL") if 'SS' in self.calcs: sa_core.av_SS(self.SS) if 'flory' in self.calcs: self.fex_hist() return None
def load(self): try: self.try_mdtraj() except OSError: fak = 10.0 if self.args.pos_data_scale == 'nm' else 1.0 self.traj_data = self.np_load(self.data_file)*fak if self.args.time_range != [0.0, 0.0]: if self.traj_data.ndim == 3: self.traj_data = self.traj_data[ self.args.time_range[0]:self.args.time_range[1],:,:] elif self.traj_data.ndim == 2: self.traj_data = self.traj_data[ self.args.time_range[0]:self.args.time_range[1],:] elif self.traj_data.ndim == 1: self.traj_data = self.traj_data[ self.args.time_range[0]:self.args.time_range[1]] if self.weighting_file is not None: if 'dcd' in self.weighting_file.split('.')[-1]: #weighting_data_temp = mdtraj.load(self.weighting_file, top=self.top_file).xyz[:,:,:-1] #self.weighting_data = np.empty((weighting_data_temp.shape[0], weighting_data_temp.shape[1])) self.weighting_data = mdtraj.load_dcd(self.weighting_file, top=self.top_file).xyz[:,:,1] else: self.weighting_data = self.np_load(self.weighting_file) #self.weighting_data = self.np_load(self.weighting_file) if self.weighting_data.ndim == 1: self.weighting_data = self.weighting_data[np.newaxis,:,np.newaxis] elif self.weighting_data.ndim == 2: if self.args.time_range != [0.0, 0.0]: self.weighting_data = self.weighting_data[ self.args.time_range[0]:self.args.time_range[1],:] self.weighting_data = self.weighting_data[:self.traj_data.shape[0],:,np.newaxis] self.traj_data *= self.weighting_data self.name_data = get_full_atom_names(self.TRAJ, self.traj_data.shape[0])
def test_reading_of_coords(): env = "vacuum" output_files_t1, _ = get_output_files_2oj9_tautomer_pair() conf = f"{get_testsystems_dir()}/config/test-2oj9-tautomer-pair-rsfe.yaml" configuration = load_config_yaml( config=conf, input_dir=get_testsystems_dir(), output_dir=get_test_output_dir() ) # NOTE: for preprocessing input_dir is the output dir b = output_files_t1[0] print(b) traj_load = md.load_dcd( f"{b}/lig_in_{env}.dcd", f"{b}/lig_in_{env}.psf", ) print(traj_load.xyz[0]) traj_open = md.open(f"{b}/lig_in_{env}.dcd") xyz, unitcell_lengths, _ = traj_open.read() xyz = xyz / 10 print(xyz[0]) assert np.allclose(xyz[0], traj_load.xyz[0])
def reconstruct(repmat, t, T, dir): """ Use the inverted conversion table to construct new trajectories with constant temperature. Center each frame, too. """ trajs = [] psf = os.getcwd() + '/' + dir + '/' + dir + '_autogen.psf' pdb = os.getcwd() + '/' + dir + '/' + 'N_000_yourcalc_START.pdb' # load each trajectory and append it to a tuple. Also make a pdb with same exact coordinates. for i in range(nreps): if i < 10: ii = '0' + str(i) else: ii = str(i) dcd = os.getcwd() + '/' + dir + '/' + 'N_0' + ii + '_yourcalc_traj.dcd' traj = md.load_dcd(dcd, top=pdb) trajs.append(traj) peptide = traj.atom_slice( traj.topology.select('(not resname ACE) and (not resname NME)'))[0] peptide.save(dir + '/T.pdb') # For each replica, go through each step in the inverted conversion table and write the new trajectory for replica in range(nreps): print "Writing new trajectory. This may take a while... Replica:", replica with md.formats.DCDTrajectoryFile( dir + '/T_' + str(T[replica]) + '.dcd', 'w') as f: for interval in range(0, t.size * 2 - 2, 2): n = int(repmat[:, replica][interval / 2]) - 1 structure = trajs[n][interval] peptide = structure.atom_slice( structure.topology.select( '(not resname ACE) and (not resname NME)'))[0] peptide.center_coordinates() f.write(peptide.xyz * 10) return None
def test_analyze_phase(monkeypatch, tmpdir): from simtk import unit as simtk_unit # Generate the required inputs build_tip3p_smirnoff_force_field().json(os.path.join(tmpdir, "ff.json")) coordinate_path, parameterized_system = _setup_dummy_system( tmpdir, Substance.from_components("O"), 10, os.path.join(tmpdir, "ff.json")) solvent_trajectory = mdtraj.load_dcd( get_data_filename("test/trajectories/water.dcd"), get_data_filename("test/trajectories/water.pdb"), ) # Mock the internally called methods. monkeypatch.setattr( SolvationYankProtocol, "_time_series_statistics", lambda *_: TimeSeriesStatistics(len(solvent_trajectory), len(solvent_trajectory), 1.0, 0), ) monkeypatch.setattr(SolvationYankProtocol, "_extract_trajectory", lambda *_: solvent_trajectory) monkeypatch.setattr( SolvationYankProtocol, "_extract_solvent_trajectory", lambda *_: solvent_trajectory, ) monkeypatch.setattr(SolvationYankProtocol, "_compute_state_energy_gradients", lambda *_: []) # Build up the protocol. protocol = SolvationYankProtocol("") protocol.thermodynamic_state = ThermodynamicState(298.15 * unit.kelvin, 1.0 * unit.atmosphere) protocol.gradient_parameters = [ ParameterGradientKey("vdW", "[#1]-[#8X2H2+0:1]-[#1]", "epsilon") ] protocol.solvent_1 = Substance.from_components("O") protocol._analysed_output = { "general": { "solvent1": { "nstates": 1 } }, "free_energy": { "solvent1": { "kT": 1.0 / simtk_unit.kilojoules_per_mole, "free_energy_diff": 0.0, "free_energy_diff_unit": 0.0 * simtk_unit.kilojoules_per_mole, "free_energy_diff_error": 0.0, "free_energy_diff_error_unit": 0.0 * simtk_unit.kilojoules_per_mole, } }, } ( free_energy, solution_trajectory, solvent_trajectory, solution_gradients, solvent_gradients, ) = protocol._analyze_phase("", parameterized_system, "solvent1", ComputeResources())
eps0 = (1 / 6.0) * coulomb_factor * 4 * pi #6*epsilon_0 permittivity DGdsc = -(gamma * qlig) * (eps0) * (nwater / box_edge**3) return DGdsc #################################################################### ########MAIN##################### #read the trajectory top = sys.argv[1] crd = sys.argv[2] dcd = sys.argv[3] step = int(sys.argv[4]) #100 base = AmberParm(top, crd) md_dcd = md.load_dcd(dcd, top=top) dg_cors = [] dg_nets = [] dg_usvs = [] dg_rips = [] dg_emps = [] dg_dscs = [] ofile.write("NET, USV, RIP, EMP, DSC, COR\n") #now cycle through all the framse for i in range(0, len(md_dcd), step): print("Analysing frame %d" % i) frame = md_dcd[i] #create a folder folder = "analysis-%d" % i os.mkdir(folder)
print('Equilibrating...') simulation.step(100) # append reporters simulation.reporters.append(app.DCDReporter('trajectory.dcd', 1000)) simulation.reporters.append(app.StateDataReporter(stdout, 1000, step=True, potentialEnergy=True, temperature=True, progress=True, remainingTime=True, speed=True, totalSteps=1000, separator='\t')) # run 50 ns of production simulation print('Running Production...') simulation.step(25000000) print('Done!') # load trajectory and remove solvent traj = md.load_dcd('trajectory.dcd', top='topology.pdb', stride=50) traj = traj.atom_slice(traj.top.select('protein or resname ZNB')) # calculate RMSD to first frame and plot figure rmsd = md.rmsd(traj, traj) plt.figure() plt.plot(rmsd) plt.title('RMSD to first frame') plt.xlabel('Frame (0.1 ns/frame)') plt.ylabel('RMSD (nm)') plt.savefig('rmsd.png', dpi=300) plt.close() # calculate mean sulfur - zinc distances for 3 metal centers and plot figure atom_pairs_1 = [[3904, 892], [3904, 917], [3904, 1136], [3904, 1180]]
def main(): #declare topology (.pdb) file and trajectory (.dcd) file topfile = sys.argv[1] trjfile = sys.argv[2] atomfile = open(sys.argv[3], 'r') #settings for dimer choice. ndim = int(input("how many dimer output files?\n")) cflag = int( input( "criterion type? 1: atomic contact distance, 2: specific pair(unable), 3: H-bond(unable)\n" )) if cflag == 1: crit1 = float(input("The threshold contact distance? (in A)\n")) crit1 *= 0.1 #unit conversion from angstrom to nanometers crit2 = int( input( "Number of required atom-atom contact for dimer selection?\n")) crit = [crit1, crit2] mon12 = input("Name of two monomers? ex) EMIM Cl\n") msplit = mon12.split() namemon1, namemon2 = msplit[0], msplit[1] rid_total = input( "residue id bound for mon1 and mon2? ex) 0 149 150 299\n") rsplit = rid_total.split() rid1, rid2 = numpy.array([int(rsplit[0]), int(rsplit[1]) ]), numpy.array([int(rsplit[2]), int(rsplit[3])]) teq = int( input( "How many initial frames do you want to cut as equilibration? ex) 5000 \n" )) start_time = timeit.default_timer() #load atomfile (pdb name, actual element name, vdw radii) pdbname1, pdbname2, element1, element2 = [], [], [], [] #vdwr1,vdwr2=numpy.empty(0),numpy.empty(0) flag = 0 for aline in atomfile: if 'mon1' in aline: flag = 1 elif 'mon2' in aline: flag = 2 else: asplit = aline.split() if flag == 1: pdbname1.append(asplit[0]) element1.append(asplit[1]) elif flag == 2: pdbname2.append(asplit[0]) element2.append(asplit[1]) natom1, natom2 = len(pdbname1), len(pdbname2) #construct vdw hetero diameter matrix #not implemented yet #load files and prepare parameters oritopol = md.load(topfile).topology table, bonds = oritopol.to_dataframe() for i in range(natom1): table.loc[table['name'] == pdbname1[i], 'name'] = element1[i] for i in range(natom2): table.loc[table['name'] == pdbname2[i], 'name'] = element2[i] topology = md.Topology.from_dataframe(table, bonds) traj = md.load_dcd(trjfile, top=topology) traj = traj[teq:] nstep = traj.n_frames #drude particle exclusion #traj=traj.atom_slice(topology.select('name != DO and name != DN1 and name != DN2 and name != DC'))i traj = traj.atom_slice( [atom.index for atom in topology.atoms if ('D' not in atom.name)]) elapsed = timeit.default_timer() - start_time print('traj loading complete. time {:10.4f}'.format(elapsed)) print('nstep: {}'.format(nstep)) #loop for dindex in range(ndim): #load 1 frame, then select dimer ntarf = int(nstep / ndim * dindex) #index of target frame oneframe = traj[ntarf] #print(oneframe) sel_dimer = search_heterodimer(oneframe, crit, rid1, rid2) #write xyz file sel_dimer.save_xyz( str(namemon1) + '_' + str(namemon2) + '_' + str(dindex) + '.xyz') if dindex % 10 == 0: elapsed = timeit.default_timer() - start_time print('step {} time {:10.4f}'.format(dindex, elapsed)) atomfile.close()
trypsin_type = RDKitMoleculeType(trypsin, mol_name="trypsin") ben_type = RDKitMoleculeType(ben, mol_name="BEN") print("finding features") ben_type.find_features() trypsin_type.find_features() print("loading trajectories") import mdtraj as mdj traj_path = osp.join(trypsin_dir, "allframes.dcd") traj_top = osp.join(trypsin_dir, "frame_0.pdb") ben_indices = range(0,18) trypsin_indices = range(18, 3247) ben_traj = mdj.load_dcd(traj_path, top=traj_top, atom_indices=ben_indices) trypsin_traj = mdj.load_dcd(traj_path, top=traj_top, atom_indices=trypsin_indices) # slice only the frames we want ben_coords = ben_traj.xyz[:] trypsin_coords = trypsin_traj.xyz[:] # the units are different in the trajectory ben_coords = ben_coords * 10 trypsin_coords = trypsin_coords * 10 print("making the system") from mastic.system import SystemType, System sys_type = SystemType({'name' : 'trypsin-benzamidine-complex',
help='run BLUES example without GPU platform') (options, args) = parser.parse_args() platformNames = [openmm.Platform.getPlatform(i).getName() for i in range(openmm.Platform.getNumPlatforms())] if 'CUDA' in platformNames: runNCMC('CUDA', relaxstepsNC, mdstep) else: if options.force: runNCMC('CPU', relaxstepsNC, mdstep) else: print('WARNING: Could not find a valid CUDA/OpenCL platform. BLUES is not recommended on CPUs.') print("To run on CPU: 'python blues/example.py -f'") di_dataFN = "dihedrals%iNC_gp%i_MD1000step.txt" %(relaxstepsNC, repeat) traj = md.load_dcd('accept.dcd', top = 'protein.pdb') indicies = np.array([[0, 4, 6, 8]]) dihedraldata = md.compute_dihedrals(traj, indicies) datafile = open(di_dataFN,'w') for value in dihedraldata: datafile.write("%s\n" % str(value)[1:-1]) datafile.close() di_dataFN2 = "nonblues_dihedrals%iNC_gp%i_MD1000step.txt" %(relaxstepsNC, repeat) traj2 = md.load_dcd('accept.dcd', top = 'protein.pdb') indicies2 = np.array([[18, 20, 22, 24]]) dihedraldata2 = md.compute_dihedrals(traj2, indicies2) datafile2 = open(di_dataFN2,'w') for value in dihedraldata2: datafile2.write("%s\n" % str(value)[1:-1])
## JOIN CSVs print("\tJoining CSVs") df_orig = pd.read_csv(original_data_file) last_time = df_orig.iloc[-1]['Time (ps)'] df_re = pd.read_csv(restarted_data_file) df_re['Time (ps)'] = df_re['Time (ps)'].apply(lambda x: x + last_time) n_orig_rows = df_orig.shape[0] df_orig = df_orig.append(df_re.iloc[:1000 - n_orig_rows]) df_orig.to_csv(file_prefix + file + '.50ns.combined.csv', index=False) ### JOIN TRAJECTORIES print("\tLoading topology PDB") # Load topology PDB original_ref = md.load_pdb(original_ref_file) print("\tRemoving solvent from topology PDB") original_ref_solute = original_ref.remove_solvent() # Load original trajectory print("\tLoading original trajectory") original_traj = md.load_dcd(original_traj_file, top=original_ref_solute) # Load restarted trajectory, slicing to keep only the number of additional frames needed print("\tLoading restarted trajectory") restarted_traj = md.load(restarted_traj_file, top=original_ref_solute) restarted_traj = restarted_traj[:1000 - original_traj.n_frames] # Join the trajectories print("\tJoining trajectories and saving them") joined_traj = md.join([original_traj, restarted_traj]) joined_traj.save(file_prefix + file + ".50ns.combined.solute.dcd")
def main(): #declare topology (.pdb) file and trajectory (.dcd) file topfile = sys.argv[1] trjfile = sys.argv[2] #atomfile = open(sys.argv[3],'r') #settings for dimer choice. ndim = int(input("how many dimer output files?\n")) cflag = int( input( "criterion type? 1: atomic contact distance, 2: specific pair, 3: vdw contact surface\n" )) if cflag == 1: crit1 = float(input("The threshold contact distance? (in A)\n")) crit1 *= 0.1 #unit conversion from angstrom to nanometers crit2 = int( input( "Number of required atom-atom contact for dimer selection?\n")) crit = [crit1, crit2] ap1, ap2 = 'None', 'None' elif cflag == 2: apair = input("Atom names (in pdb file) for pairs? ex) H13 O2a\n") asplit = apair.split() ap1, ap2 = asplit[0], asplit[1] crit1 = float(input("The threshold contact distance? (in A)\n")) crit1 *= 0.1 #unit conversion from angstrom to nanometers crit2 = int( input( "Number of required atom-atom contact for dimer selection? ex) 1\n" )) crit = [crit1, crit2] elif cflag == 3: crit1 = float(input("\n")) mon12 = input("Name of two monomers? ex) EMIM Cl\n") msplit = mon12.split() namemon1, namemon2 = msplit[0], msplit[1] rid_total = input( "residue id bound for mon1 and mon2? ex) 0 149 150 299\n") rsplit = rid_total.split() rid1, rid2 = numpy.array([int(rsplit[0]), int(rsplit[1]) ]), numpy.array([int(rsplit[2]), int(rsplit[3])]) teq = int( input( "How many initial frames do you want to cut as equilibration? ex) 5000 \n" )) save_drude = input("Include drude particles? y or n \n") start_time = timeit.default_timer() #load atomfile (pdb name, actual element name, vdw radii) #pdbname1,pdbname2,element1,element2=[],[],[],[] #vdwr1,vdwr2=numpy.empty(0),numpy.empty(0) #flag=0 #for aline in atomfile: # if 'mon1' in aline: # flag=1 # elif 'mon2' in aline: # flag=2 # else: # asplit=aline.split() # if flag==1: # pdbname1.append(asplit[0]) # element1.append(asplit[1]) # elif flag==2: # pdbname2.append(asplit[0]) # element2.append(asplit[1]) #natom1,natom2=len(pdbname1),len(pdbname2) #construct vdw hetero diameter matrix #not implemented yet #load files and prepare parameters #caution : can have bug if drude particle indices are preceding #oritopol=md.load(topfile).topology #oritopol=oritopol.subset([atom.index for atom in oritopol.atoms if ('D' not in atom.name)]) #table,bonds=oritopol.to_dataframe() #for i in range(natom1): # table.loc[ table['name'] == pdbname1[i],'name' ] = element1[i] #for i in range(natom2): # table.loc[ table['name'] == pdbname2[i],'name' ] = element2[i] #topology=md.Topology.from_dataframe(table,bonds) topology = md.load(topfile).topology traj = md.load_dcd(trjfile, top=topology) traj = traj[teq:] nstep = traj.n_frames #drude particle exclusion if save_drude == 'n': traj = traj.atom_slice([ atom.index for atom in topology.atoms if ('del' not in atom.name) ]) #ap1list,ap2list construction (reload topfile) #oritopol=oritopol.subset([atom.index for atom in oritopol.atoms if ('del' not in atom.name)]) #ap1list,ap2list=oritopol.select('name '+ap1),oritopol.select('name '+ap2) ap1list, ap2list = topology.select('name ' + ap1), topology.select('name ' + ap2) elapsed = timeit.default_timer() - start_time print('traj loading complete. time {:10.4f}'.format(elapsed)) print('nstep: {}'.format(nstep)) #loop for dindex in range(ndim): #load 1 frame, then select dimer ntarf = int(nstep / ndim * dindex) #index of target frame oneframe = traj[ntarf] #print(oneframe) sel_dimer = search_heterodimer(oneframe, crit, rid1, rid2, cflag, ap1list, ap2list) #write xyz file sel_dimer.save_xyz( str(namemon1) + '_' + str(namemon2) + '_' + str(dindex) + '.xyz') sel_dimer.save_pdb( str(namemon1) + '_' + str(namemon2) + '_' + str(dindex) + '.pdb') if dindex % 10 == 0: elapsed = timeit.default_timer() - start_time print('step {} time {:10.4f}'.format(dindex, elapsed)) atomfile.close()
import mdtraj as md import time import os t0 = time.clock() traj_index = 80 os.system('mkdir /archive/yzhang/PS2/quinone/md_traj/traj_%s' % traj_index) topo = '/archive/yzhang/PS2/quinone/md_traj/step5_charmm2omm_keep.psf' traj = '/archive/yzhang/PS2/quinone/md_traj/step7_%s.dcd' % traj_index trajectory = md.load_dcd(traj, top=topo) for i in range(0, 100, 2): trajectory[i].save_pdb( '/archive/yzhang/PS2/quinone/md_traj/traj_%s/frame%s_%s.pdb' % (traj_index, i, traj_index), force_overwrite=True, bfactors=None) t3 = time.clock() print('total time:', t3)