def test_reweight_statistics(): number_of_frames = 10 reduced_potentials = (np.ones(number_of_frames) * random.random() * unit.dimensionless) potentials = (np.ones(number_of_frames) * random.random() * unit.kilojoule / unit.mole) with tempfile.TemporaryDirectory() as directory: statistics_path = path.join(directory, "stats.csv") statistics_array = StatisticsArray() statistics_array[ObservableType.ReducedPotential] = reduced_potentials statistics_array[ObservableType.PotentialEnergy] = potentials statistics_array.to_pandas_csv(statistics_path) reweight_protocol = ReweightStatistics(f"reduced_potentials") reweight_protocol.statistics_type = ObservableType.PotentialEnergy reweight_protocol.statistics_paths = statistics_path reweight_protocol.reference_reduced_potentials = statistics_path reweight_protocol.target_reduced_potentials = statistics_path reweight_protocol.bootstrap_uncertainties = True reweight_protocol.required_effective_samples = 0 reweight_protocol.execute(directory, ComputeResources())
def test_statistics_object(): statistics_object = StatisticsArray.from_openmm_csv( get_data_filename("test/statistics/stats_openmm.csv"), 1 * unit.atmosphere) statistics_object.to_pandas_csv("stats_pandas.csv") statistics_object = StatisticsArray.from_pandas_csv("stats_pandas.csv") assert statistics_object is not None subsampled_array = StatisticsArray.from_existing(statistics_object, [1, 2, 3]) assert subsampled_array is not None and len(subsampled_array) == 3 if os.path.isfile("stats_pandas.csv"): os.unlink("stats_pandas.csv") reduced_potential = np.array([0.1] * (len(statistics_object) - 1)) with pytest.raises(ValueError): statistics_object[ObservableType.ReducedPotential] = reduced_potential reduced_potential = np.array([0.1] * len(statistics_object)) with pytest.raises(ValueError): statistics_object[ObservableType.ReducedPotential] = reduced_potential statistics_object[ObservableType.ReducedPotential] = (reduced_potential * unit.dimensionless) assert ObservableType.ReducedPotential in statistics_object
def _load_reduced_potentials(self): """Loads the target and reference reduced potentials from the specified statistics files. Returns ------- numpy.ndarray The reference reduced potentials array with dtype=double and shape=(1,) numpy.ndarray The target reduced potentials array with dtype=double and shape=(1,) """ if isinstance(self.reference_reduced_potentials, str): self.reference_reduced_potentials = [ self.reference_reduced_potentials ] if isinstance(self.target_reduced_potentials, str): self.target_reduced_potentials = [self.target_reduced_potentials] reference_reduced_potentials = [] target_reduced_potentials = [] # Load in the reference reduced potentials. for file_path in self.reference_reduced_potentials: statistics_array = StatisticsArray.from_pandas_csv(file_path) reduced_potentials = statistics_array[ ObservableType.ReducedPotential] reference_reduced_potentials.append( reduced_potentials.to(unit.dimensionless).magnitude) # Load in the target reduced potentials. if len(target_reduced_potentials) > 1: raise ValueError( "This protocol currently only supports reweighting to " "a single target state.") for file_path in self.target_reduced_potentials: statistics_array = StatisticsArray.from_pandas_csv(file_path) reduced_potentials = statistics_array[ ObservableType.ReducedPotential] target_reduced_potentials.append( reduced_potentials.to(unit.dimensionless).magnitude) reference_reduced_potentials = np.array(reference_reduced_potentials) target_reduced_potentials = np.array(target_reduced_potentials) return reference_reduced_potentials, target_reduced_potentials
def test_run_openmm_simulation_checkpoints(): import mdtraj thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: coordinate_path, system_path = _setup_dummy_system(directory) # Check that executing twice doesn't run the simulation twice npt_equilibration = OpenMMSimulation("npt_equilibration") npt_equilibration.total_number_of_iterations = 1 npt_equilibration.steps_per_iteration = 4 npt_equilibration.output_frequency = 1 npt_equilibration.thermodynamic_state = thermodynamic_state npt_equilibration.input_coordinate_file = coordinate_path npt_equilibration.system_path = system_path npt_equilibration.execute(directory, ComputeResources()) assert os.path.isfile(npt_equilibration._checkpoint_path) npt_equilibration.execute(directory, ComputeResources()) assert (len( StatisticsArray.from_pandas_csv( npt_equilibration.statistics_file_path)) == 4) assert (len( mdtraj.load(npt_equilibration.trajectory_file_path, top=coordinate_path)) == 4) # Make sure that the output files are correctly truncating if more frames # than expected are written with open(npt_equilibration._checkpoint_path, "r") as file: checkpoint = json.load(file, cls=TypedJSONDecoder) # Fake having saved more frames than expected npt_equilibration.steps_per_iteration = 8 checkpoint.steps_per_iteration = 8 npt_equilibration.output_frequency = 2 checkpoint.output_frequency = 2 with open(npt_equilibration._checkpoint_path, "w") as file: json.dump(checkpoint, file, cls=TypedJSONEncoder) npt_equilibration.execute(directory, ComputeResources()) assert (len( StatisticsArray.from_pandas_csv( npt_equilibration.statistics_file_path)) == 4) assert (len( mdtraj.load(npt_equilibration.trajectory_file_path, top=coordinate_path)) == 4)
def test_concatenate_statistics(): statistics_path = get_data_filename("test/statistics/stats_pandas.csv") original_array = StatisticsArray.from_pandas_csv(statistics_path) with tempfile.TemporaryDirectory() as temporary_directory: concatenate_protocol = ConcatenateStatistics("concatenate_protocol") concatenate_protocol.input_statistics_paths = [ statistics_path, statistics_path ] concatenate_protocol.execute(temporary_directory, ComputeResources()) final_array = StatisticsArray.from_pandas_csv( concatenate_protocol.output_statistics_path) assert len(final_array) == len(original_array) * 2
def _save_final_statistics(self, path, temperature, pressure): """Converts the openmm statistic csv file into a evaluator StatisticsArray csv file, making sure to fill in any missing entries. Parameters ---------- path: str The path to save the statistics to. temperature: pint.Quantity The temperature that the simulation is being run at. pressure: pint.Quantity The pressure that the simulation is being run at. """ statistics = StatisticsArray.from_openmm_csv( self._local_statistics_path, pressure) reduced_potentials = (statistics[ObservableType.PotentialEnergy] / unit.avogadro_constant) if pressure is not None: pv_terms = pressure * statistics[ObservableType.Volume] reduced_potentials += pv_terms beta = 1.0 / (unit.boltzmann_constant * temperature) statistics[ObservableType.ReducedPotential] = (beta * reduced_potentials).to( unit.dimensionless) statistics.to_pandas_csv(path)
def _execute(self, directory, available_resources): if len(self.input_statistics_paths) == 0: raise ValueError("No statistics arrays were given to concatenate.") arrays = [ StatisticsArray.from_pandas_csv(file_path) for file_path in self.input_statistics_paths ] if len(arrays) > 1: output_array = StatisticsArray.join(*arrays) else: output_array = arrays[0] self.output_statistics_path = path.join(directory, "output_statistics.csv") output_array.to_pandas_csv(self.output_statistics_path)
def test_extract_uncorrelated_statistics_data(): statistics_path = get_data_filename("test/statistics/stats_pandas.csv") original_array = StatisticsArray.from_pandas_csv(statistics_path) with tempfile.TemporaryDirectory() as temporary_directory: extract_protocol = ExtractUncorrelatedStatisticsData( "extract_protocol") extract_protocol.input_statistics_path = statistics_path extract_protocol.equilibration_index = 2 extract_protocol.statistical_inefficiency = 2.0 extract_protocol.execute(temporary_directory, ComputeResources()) final_array = StatisticsArray.from_pandas_csv( extract_protocol.output_statistics_path) assert len(final_array) == (len(original_array) - 2) / 2 assert (extract_protocol.number_of_uncorrelated_samples == (len(original_array) - 2) / 2)
def _execute(self, directory, available_resources): statistics_array = StatisticsArray.from_pandas_csv( self.input_statistics_path) uncorrelated_indices = timeseries.get_uncorrelated_indices( len(statistics_array) - self.equilibration_index, self.statistical_inefficiency, ) uncorrelated_indices = [ index + self.equilibration_index for index in uncorrelated_indices ] uncorrelated_statistics = StatisticsArray.from_existing( statistics_array, uncorrelated_indices) self.output_statistics_path = path.join(directory, "uncorrelated_statistics.csv") uncorrelated_statistics.to_pandas_csv(self.output_statistics_path) self.number_of_uncorrelated_samples = len(uncorrelated_statistics)
def _execute(self, directory, available_resources): if isinstance(self.statistics_paths, str): self.statistics_paths = [self.statistics_paths] if self.statistics_paths is None or len(self.statistics_paths) == 0: return ValueError("No statistics paths were provided.") if len(self.frame_counts) > 0 and len(self.statistics_paths) != 1: raise ValueError( "The frame counts input can only be used when only a single " "path is passed to the `statistics_paths` input.", ) if self.statistics_type == ObservableType.KineticEnergy: raise ValueError(f"Kinetic energies cannot be reweighted.") statistics_arrays = [ StatisticsArray.from_pandas_csv(file_path) for file_path in self.statistics_paths ] self._reference_observables = [] if len(self.frame_counts) > 0: statistics_array = statistics_arrays[0] current_index = 0 for frame_count in self.frame_counts: if frame_count <= 0: raise ValueError("The frame counts must be > 0.") observables = statistics_array[ self.statistics_type][current_index:current_index + frame_count] self._reference_observables.append(observables) current_index += frame_count else: for statistics_array in statistics_arrays: observables = statistics_array[self.statistics_type] self._reference_observables.append(observables) return super(ReweightStatistics, self)._execute(directory, available_resources)
def test_calculate_reduced_potential_openmm(): substance = Substance.from_components("O") thermodynamic_state = ThermodynamicState(298 * unit.kelvin, 1.0 * unit.atmosphere) with tempfile.TemporaryDirectory() as directory: force_field_path = path.join(directory, "ff.json") with open(force_field_path, "w") as file: file.write(build_tip3p_smirnoff_force_field().json()) build_coordinates = BuildCoordinatesPackmol("build_coordinates") build_coordinates.max_molecules = 10 build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters build_coordinates.substance = substance build_coordinates.execute(directory, None) assign_parameters = BuildSmirnoffSystem(f"assign_parameters") assign_parameters.force_field_path = force_field_path assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path assign_parameters.substance = substance assign_parameters.execute(directory, None) reduced_potentials = OpenMMReducedPotentials(f"reduced_potentials") reduced_potentials.substance = substance reduced_potentials.thermodynamic_state = thermodynamic_state reduced_potentials.reference_force_field_paths = [force_field_path] reduced_potentials.system_path = assign_parameters.system_path reduced_potentials.trajectory_file_path = get_data_filename( "test/trajectories/water.dcd") reduced_potentials.coordinate_file_path = get_data_filename( "test/trajectories/water.pdb") reduced_potentials.kinetic_energies_path = get_data_filename( "test/statistics/stats_pandas.csv") reduced_potentials.high_precision = False reduced_potentials.execute(directory, ComputeResources()) assert path.isfile(reduced_potentials.statistics_file_path) final_array = StatisticsArray.from_pandas_csv( reduced_potentials.statistics_file_path) assert ObservableType.ReducedPotential in final_array
def _execute(self, directory, available_resources): import mdtraj from openforcefield.topology import Molecule, Topology with open(self.force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(force_field_source, SmirnoffForceFieldSource): raise ValueError( "Only SMIRNOFF force fields are supported by this protocol.", ) # Load in the inputs force_field = force_field_source.to_force_field() trajectory = mdtraj.load_dcd(self.trajectory_file_path, self.coordinate_file_path) unique_molecules = [] for component in self.substance.components: molecule = Molecule.from_smiles(smiles=component.smiles) unique_molecules.append(molecule) pdb_file = app.PDBFile(self.coordinate_file_path) topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules) # Compute the difference between the energies using the reduced force field, # and the full force field. energy_corrections = None if self.use_subset_of_force_field: target_system, _ = self._build_reduced_system( force_field, topology) subset_potentials_path = os.path.join(directory, f"subset.csv") subset_potentials = self._evaluate_reduced_potential( target_system, trajectory, subset_potentials_path, available_resources) full_statistics = StatisticsArray.from_pandas_csv( self.statistics_path) energy_corrections = ( full_statistics[ObservableType.PotentialEnergy] - subset_potentials[ObservableType.PotentialEnergy]) # Build the slightly perturbed system. reverse_system, reverse_parameter_value = self._build_reduced_system( force_field, topology, -self.perturbation_scale) forward_system, forward_parameter_value = self._build_reduced_system( force_field, topology, self.perturbation_scale) self.reverse_parameter_value = openmm_quantity_to_pint( reverse_parameter_value) self.forward_parameter_value = openmm_quantity_to_pint( forward_parameter_value) # Calculate the reduced potentials. self.reverse_potentials_path = os.path.join(directory, "reverse.csv") self.forward_potentials_path = os.path.join(directory, "forward.csv") self._evaluate_reduced_potential( reverse_system, trajectory, self.reverse_potentials_path, available_resources, energy_corrections, ) self._evaluate_reduced_potential( forward_system, trajectory, self.forward_potentials_path, available_resources, energy_corrections, )
def _evaluate_reduced_potential( self, system, trajectory, file_path, compute_resources, subset_energy_corrections=None, ): """Computes the reduced potential of each frame in a trajectory using the provided system. Parameters ---------- system: simtk.openmm.System The system which encodes the interaction forces for the specified parameter. trajectory: mdtraj.Trajectory A trajectory of configurations to evaluate. file_path: str The path to save the reduced potentials to. compute_resources: ComputeResources The compute resources available to execute on. subset_energy_corrections: pint.Quantity, optional A pint.Quantity wrapped numpy.ndarray which contains a set of energies to add to the re-evaluated potential energies. This is mainly used to correct the potential energies evaluated using a subset of the force field back to energies as if evaluated using the full thing. Returns --------- StatisticsArray The array containing the reduced potentials. """ integrator = openmm.VerletIntegrator(0.1 * simtk_unit.femtoseconds) platform = setup_platform_with_resources(compute_resources, True) openmm_context = openmm.Context(system, integrator, platform) potentials = np.zeros(trajectory.n_frames, dtype=np.float64) reduced_potentials = np.zeros(trajectory.n_frames, dtype=np.float64) temperature = pint_quantity_to_openmm( self.thermodynamic_state.temperature) beta = 1.0 / (simtk_unit.BOLTZMANN_CONSTANT_kB * temperature) pressure = pint_quantity_to_openmm(self.thermodynamic_state.pressure) if subset_energy_corrections is None: subset_energy_corrections = ( np.zeros(trajectory.n_frames, dtype=np.float64) * simtk_unit.kilojoules_per_mole) else: subset_energy_corrections = pint_quantity_to_openmm( subset_energy_corrections) for frame_index in range(trajectory.n_frames): positions = trajectory.xyz[frame_index] box_vectors = trajectory.openmm_boxes(frame_index) if self.enable_pbc: openmm_context.setPeriodicBoxVectors(*box_vectors) openmm_context.setPositions(positions) state = openmm_context.getState(getEnergy=True) potential_energy = (state.getPotentialEnergy() + subset_energy_corrections[frame_index]) unreduced_potential = potential_energy / simtk_unit.AVOGADRO_CONSTANT_NA if pressure is not None and self.enable_pbc: unreduced_potential += pressure * state.getPeriodicBoxVolume() potentials[frame_index] = potential_energy.value_in_unit( simtk_unit.kilojoule_per_mole) reduced_potentials[frame_index] = unreduced_potential * beta potentials *= unit.kilojoule / unit.mole reduced_potentials *= unit.dimensionless statistics_array = StatisticsArray() statistics_array[ObservableType.ReducedPotential] = reduced_potentials statistics_array[ObservableType.PotentialEnergy] = potentials statistics_array.to_pandas_csv(file_path) return statistics_array
def _execute(self, directory, available_resources): import openmmtools import mdtraj trajectory = mdtraj.load_dcd(self.trajectory_file_path, self.coordinate_file_path) with open(self.system_path, "r") as file: system = openmm.XmlSerializer.deserialize(file.read()) temperature = pint_quantity_to_openmm( self.thermodynamic_state.temperature) pressure = pint_quantity_to_openmm(self.thermodynamic_state.pressure) if self.enable_pbc: system.setDefaultPeriodicBoxVectors(*trajectory.openmm_boxes(0)) else: pressure = None openmm_state = openmmtools.states.ThermodynamicState( system=system, temperature=temperature, pressure=pressure) integrator = openmmtools.integrators.VelocityVerletIntegrator( 0.01 * simtk_unit.femtoseconds) # Setup the requested platform: platform = setup_platform_with_resources(available_resources, self.high_precision) openmm_system = openmm_state.get_system(True, True) if not self.enable_pbc: disable_pbc(openmm_system) openmm_context = openmm.Context(openmm_system, integrator, platform) potential_energies = np.zeros(trajectory.n_frames) reduced_potentials = np.zeros(trajectory.n_frames) for frame_index in range(trajectory.n_frames): if self.enable_pbc: box_vectors = trajectory.openmm_boxes(frame_index) openmm_context.setPeriodicBoxVectors(*box_vectors) positions = trajectory.xyz[frame_index] openmm_context.setPositions(positions) potential_energy = openmm_context.getState( getEnergy=True).getPotentialEnergy() potential_energies[frame_index] = potential_energy.value_in_unit( simtk_unit.kilojoule_per_mole) reduced_potentials[frame_index] = openmm_state.reduced_potential( openmm_context) kinetic_energies = StatisticsArray.from_pandas_csv( self.kinetic_energies_path)[ObservableType.KineticEnergy] statistics_array = StatisticsArray() statistics_array[ObservableType.PotentialEnergy] = ( potential_energies * unit.kilojoule / unit.mole) statistics_array[ObservableType.KineticEnergy] = kinetic_energies statistics_array[ObservableType.ReducedPotential] = ( reduced_potentials * unit.dimensionless) statistics_array[ObservableType.TotalEnergy] = ( statistics_array[ObservableType.PotentialEnergy] + statistics_array[ObservableType.KineticEnergy]) statistics_array[ObservableType.Enthalpy] = ( statistics_array[ObservableType.ReducedPotential] * self.thermodynamic_state.inverse_beta + kinetic_energies) if self.use_internal_energy: statistics_array[ObservableType.ReducedPotential] += ( kinetic_energies * self.thermodynamic_state.beta) self.statistics_file_path = os.path.join(directory, "statistics.csv") statistics_array.to_pandas_csv(self.statistics_file_path)