예제 #1
0
def test_reweight_statistics():

    number_of_frames = 10

    reduced_potentials = (np.ones(number_of_frames) * random.random() *
                          unit.dimensionless)
    potentials = (np.ones(number_of_frames) * random.random() *
                  unit.kilojoule / unit.mole)

    with tempfile.TemporaryDirectory() as directory:

        statistics_path = path.join(directory, "stats.csv")

        statistics_array = StatisticsArray()
        statistics_array[ObservableType.ReducedPotential] = reduced_potentials
        statistics_array[ObservableType.PotentialEnergy] = potentials
        statistics_array.to_pandas_csv(statistics_path)

        reweight_protocol = ReweightStatistics("reduced_potentials")
        reweight_protocol.statistics_type = ObservableType.PotentialEnergy
        reweight_protocol.statistics_paths = statistics_path
        reweight_protocol.reference_reduced_potentials = statistics_path
        reweight_protocol.target_reduced_potentials = statistics_path
        reweight_protocol.bootstrap_uncertainties = True
        reweight_protocol.required_effective_samples = 0
        reweight_protocol.execute(directory, ComputeResources())
예제 #2
0
def test_statistics_object():

    statistics_object = StatisticsArray.from_openmm_csv(
        get_data_filename("test/statistics/stats_openmm.csv"), 1 * unit.atmosphere
    )

    statistics_object.to_pandas_csv("stats_pandas.csv")

    statistics_object = StatisticsArray.from_pandas_csv("stats_pandas.csv")
    assert statistics_object is not None

    subsampled_array = StatisticsArray.from_existing(statistics_object, [1, 2, 3])
    assert subsampled_array is not None and len(subsampled_array) == 3

    if os.path.isfile("stats_pandas.csv"):
        os.unlink("stats_pandas.csv")

    reduced_potential = np.array([0.1] * (len(statistics_object) - 1))

    with pytest.raises(ValueError):
        statistics_object[ObservableType.ReducedPotential] = reduced_potential

    reduced_potential = np.array([0.1] * len(statistics_object))

    with pytest.raises(ValueError):
        statistics_object[ObservableType.ReducedPotential] = reduced_potential

    statistics_object[ObservableType.ReducedPotential] = (
        reduced_potential * unit.dimensionless
    )

    assert ObservableType.ReducedPotential in statistics_object
예제 #3
0
    def _load_reduced_potentials(self):
        """Loads the target and reference reduced potentials
        from the specified statistics files.

        Returns
        -------
        numpy.ndarray
            The reference reduced potentials array with dtype=double and
            shape=(1,)
        numpy.ndarray
            The target reduced potentials array with dtype=double and
            shape=(1,)
        """

        if isinstance(self.reference_reduced_potentials, str):
            self.reference_reduced_potentials = [self.reference_reduced_potentials]

        if isinstance(self.target_reduced_potentials, str):
            self.target_reduced_potentials = [self.target_reduced_potentials]

        reference_reduced_potentials = []
        target_reduced_potentials = []

        # Load in the reference reduced potentials.
        for file_path in self.reference_reduced_potentials:

            statistics_array = StatisticsArray.from_pandas_csv(file_path)
            reduced_potentials = statistics_array[ObservableType.ReducedPotential]

            reference_reduced_potentials.append(
                reduced_potentials.to(unit.dimensionless).magnitude
            )

        # Load in the target reduced potentials.
        if len(target_reduced_potentials) > 1:

            raise ValueError(
                "This protocol currently only supports reweighting to "
                "a single target state."
            )

        for file_path in self.target_reduced_potentials:

            statistics_array = StatisticsArray.from_pandas_csv(file_path)
            reduced_potentials = statistics_array[ObservableType.ReducedPotential]

            target_reduced_potentials.append(
                reduced_potentials.to(unit.dimensionless).magnitude
            )

        reference_reduced_potentials = np.array(reference_reduced_potentials)
        target_reduced_potentials = np.array(target_reduced_potentials)

        return reference_reduced_potentials, target_reduced_potentials
예제 #4
0
    def _save_final_statistics(self, path, temperature, pressure):
        """Converts the openmm statistic csv file into an openff-evaluator
        StatisticsArray csv file, making sure to fill in any missing entries.

        Parameters
        ----------
        path: str
            The path to save the statistics to.
        temperature: pint.Quantity
            The temperature that the simulation is being run at.
        pressure: pint.Quantity
            The pressure that the simulation is being run at.
        """
        statistics = StatisticsArray.from_openmm_csv(
            self._local_statistics_path, pressure)

        reduced_potentials = (statistics[ObservableType.PotentialEnergy] /
                              unit.avogadro_constant)

        if pressure is not None:

            pv_terms = pressure * statistics[ObservableType.Volume]
            reduced_potentials += pv_terms

        beta = 1.0 / (unit.boltzmann_constant * temperature)
        statistics[ObservableType.ReducedPotential] = (beta *
                                                       reduced_potentials).to(
                                                           unit.dimensionless)

        statistics.to_pandas_csv(path)
예제 #5
0
def test_concatenate_statistics():

    statistics_path = get_data_filename("test/statistics/stats_pandas.csv")
    original_array = StatisticsArray.from_pandas_csv(statistics_path)

    with tempfile.TemporaryDirectory() as temporary_directory:

        concatenate_protocol = ConcatenateStatistics("concatenate_protocol")
        concatenate_protocol.input_statistics_paths = [
            statistics_path, statistics_path
        ]
        concatenate_protocol.execute(temporary_directory, ComputeResources())

        final_array = StatisticsArray.from_pandas_csv(
            concatenate_protocol.output_statistics_path)
        assert len(final_array) == len(original_array) * 2
예제 #6
0
    def _execute(self, directory, available_resources):

        if len(self.input_statistics_paths) == 0:
            raise ValueError("No statistics arrays were given to concatenate.")

        arrays = [
            StatisticsArray.from_pandas_csv(file_path)
            for file_path in self.input_statistics_paths
        ]

        if len(arrays) > 1:
            output_array = StatisticsArray.join(*arrays)
        else:
            output_array = arrays[0]

        self.output_statistics_path = path.join(directory, "output_statistics.csv")
        output_array.to_pandas_csv(self.output_statistics_path)
예제 #7
0
def test_extract_uncorrelated_statistics_data():

    statistics_path = get_data_filename("test/statistics/stats_pandas.csv")
    original_array = StatisticsArray.from_pandas_csv(statistics_path)

    with tempfile.TemporaryDirectory() as temporary_directory:

        extract_protocol = ExtractUncorrelatedStatisticsData(
            "extract_protocol")
        extract_protocol.input_statistics_path = statistics_path
        extract_protocol.equilibration_index = 2
        extract_protocol.statistical_inefficiency = 2.0
        extract_protocol.execute(temporary_directory, ComputeResources())

        final_array = StatisticsArray.from_pandas_csv(
            extract_protocol.output_statistics_path)
        assert len(final_array) == (len(original_array) - 2) / 2
        assert (extract_protocol.number_of_uncorrelated_samples ==
                (len(original_array) - 2) / 2)
예제 #8
0
    def _execute(self, directory, available_resources):

        statistics_array = StatisticsArray.from_pandas_csv(
            self.input_statistics_path)

        uncorrelated_indices = timeseries.get_uncorrelated_indices(
            len(statistics_array) - self.equilibration_index,
            self.statistical_inefficiency,
        )

        uncorrelated_indices = [
            index + self.equilibration_index for index in uncorrelated_indices
        ]
        uncorrelated_statistics = StatisticsArray.from_existing(
            statistics_array, uncorrelated_indices)

        self.output_statistics_path = path.join(directory,
                                                "uncorrelated_statistics.csv")
        uncorrelated_statistics.to_pandas_csv(self.output_statistics_path)

        self.number_of_uncorrelated_samples = len(uncorrelated_statistics)
예제 #9
0
    def _execute(self, directory, available_resources):

        if isinstance(self.statistics_paths, str):
            self.statistics_paths = [self.statistics_paths]

        if self.statistics_paths is None or len(self.statistics_paths) == 0:
            return ValueError("No statistics paths were provided.")

        if len(self.frame_counts) > 0 and len(self.statistics_paths) != 1:

            raise ValueError(
                "The frame counts input can only be used when only a single "
                "path is passed to the `statistics_paths` input.",
            )

        if self.statistics_type == ObservableType.KineticEnergy:
            raise ValueError("Kinetic energies cannot be reweighted.")

        statistics_arrays = [
            StatisticsArray.from_pandas_csv(file_path)
            for file_path in self.statistics_paths
        ]

        self._reference_observables = []

        if len(self.frame_counts) > 0:

            statistics_array = statistics_arrays[0]
            current_index = 0

            for frame_count in self.frame_counts:

                if frame_count <= 0:
                    raise ValueError("The frame counts must be > 0.")

                observables = statistics_array[self.statistics_type][
                    current_index : current_index + frame_count
                ]
                self._reference_observables.append(observables)

                current_index += frame_count

        else:

            for statistics_array in statistics_arrays:

                observables = statistics_array[self.statistics_type]
                self._reference_observables.append(observables)

        return super(ReweightStatistics, self)._execute(directory, available_resources)
예제 #10
0
def test_calculate_reduced_potential_openmm():

    substance = Substance.from_components("O")
    thermodynamic_state = ThermodynamicState(298 * unit.kelvin,
                                             1.0 * unit.atmosphere)

    with tempfile.TemporaryDirectory() as directory:
        force_field_path = path.join(directory, "ff.json")

        with open(force_field_path, "w") as file:
            file.write(build_tip3p_smirnoff_force_field().json())

        build_coordinates = BuildCoordinatesPackmol("build_coordinates")
        build_coordinates.max_molecules = 10
        build_coordinates.mass_density = 0.05 * unit.grams / unit.milliliters
        build_coordinates.substance = substance
        build_coordinates.execute(directory, None)

        assign_parameters = BuildSmirnoffSystem("assign_parameters")
        assign_parameters.force_field_path = force_field_path
        assign_parameters.coordinate_file_path = build_coordinates.coordinate_file_path
        assign_parameters.substance = substance
        assign_parameters.execute(directory, None)

        reduced_potentials = OpenMMReducedPotentials("reduced_potentials")
        reduced_potentials.substance = substance
        reduced_potentials.thermodynamic_state = thermodynamic_state
        reduced_potentials.reference_force_field_paths = [force_field_path]
        reduced_potentials.system_path = assign_parameters.system_path
        reduced_potentials.trajectory_file_path = get_data_filename(
            "test/trajectories/water.dcd")
        reduced_potentials.coordinate_file_path = get_data_filename(
            "test/trajectories/water.pdb")
        reduced_potentials.kinetic_energies_path = get_data_filename(
            "test/statistics/stats_pandas.csv")
        reduced_potentials.high_precision = False
        reduced_potentials.execute(directory, ComputeResources())

        assert path.isfile(reduced_potentials.statistics_file_path)

        final_array = StatisticsArray.from_pandas_csv(
            reduced_potentials.statistics_file_path)
        assert ObservableType.ReducedPotential in final_array
예제 #11
0
    def _execute(self, directory, available_resources):

        import mdtraj
        from openforcefield.topology import Molecule, Topology

        with open(self.force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, SmirnoffForceFieldSource):

            raise ValueError(
                "Only SMIRNOFF force fields are supported by this protocol.", )

        # Load in the inputs
        force_field = force_field_source.to_force_field()

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        unique_molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)
            unique_molecules.append(molecule)

        pdb_file = app.PDBFile(self.coordinate_file_path)
        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        # Compute the difference between the energies using the reduced force field,
        # and the full force field.
        energy_corrections = None

        if self.use_subset_of_force_field:

            target_system, _ = self._build_reduced_system(
                force_field, topology)

            subset_potentials_path = os.path.join(directory, "subset.csv")
            subset_potentials = self._evaluate_reduced_potential(
                target_system, trajectory, subset_potentials_path,
                available_resources)

            full_statistics = StatisticsArray.from_pandas_csv(
                self.statistics_path)

            energy_corrections = (
                full_statistics[ObservableType.PotentialEnergy] -
                subset_potentials[ObservableType.PotentialEnergy])

        # Build the slightly perturbed system.
        reverse_system, reverse_parameter_value = self._build_reduced_system(
            force_field, topology, -self.perturbation_scale)

        forward_system, forward_parameter_value = self._build_reduced_system(
            force_field, topology, self.perturbation_scale)

        self.reverse_parameter_value = openmm_quantity_to_pint(
            reverse_parameter_value)
        self.forward_parameter_value = openmm_quantity_to_pint(
            forward_parameter_value)

        # Calculate the reduced potentials.
        self.reverse_potentials_path = os.path.join(directory, "reverse.csv")
        self.forward_potentials_path = os.path.join(directory, "forward.csv")

        self._evaluate_reduced_potential(
            reverse_system,
            trajectory,
            self.reverse_potentials_path,
            available_resources,
            energy_corrections,
        )
        self._evaluate_reduced_potential(
            forward_system,
            trajectory,
            self.forward_potentials_path,
            available_resources,
            energy_corrections,
        )
예제 #12
0
    def _evaluate_reduced_potential(
        self,
        system,
        trajectory,
        file_path,
        compute_resources,
        subset_energy_corrections=None,
    ):
        """Computes the reduced potential of each frame in a trajectory
        using the provided system.

        Parameters
        ----------
        system: simtk.openmm.System
            The system which encodes the interaction forces for the
            specified parameter.
        trajectory: mdtraj.Trajectory
            A trajectory of configurations to evaluate.
        file_path: str
            The path to save the reduced potentials to.
        compute_resources: ComputeResources
            The compute resources available to execute on.
        subset_energy_corrections: pint.Quantity, optional
            A pint.Quantity wrapped numpy.ndarray which contains a set
            of energies to add to the re-evaluated potential energies.
            This is mainly used to correct the potential energies evaluated
            using a subset of the force field back to energies as if evaluated
            using the full thing.

        Returns
        ---------
        StatisticsArray
            The array containing the reduced potentials.
        """
        integrator = openmm.VerletIntegrator(0.1 * simtk_unit.femtoseconds)

        platform = setup_platform_with_resources(compute_resources, True)
        openmm_context = openmm.Context(system, integrator, platform)

        potentials = np.zeros(trajectory.n_frames, dtype=np.float64)
        reduced_potentials = np.zeros(trajectory.n_frames, dtype=np.float64)

        temperature = pint_quantity_to_openmm(
            self.thermodynamic_state.temperature)
        beta = 1.0 / (simtk_unit.BOLTZMANN_CONSTANT_kB * temperature)

        pressure = pint_quantity_to_openmm(self.thermodynamic_state.pressure)

        if subset_energy_corrections is None:
            subset_energy_corrections = (
                np.zeros(trajectory.n_frames, dtype=np.float64) *
                simtk_unit.kilojoules_per_mole)
        else:
            subset_energy_corrections = pint_quantity_to_openmm(
                subset_energy_corrections)

        for frame_index in range(trajectory.n_frames):

            positions = trajectory.xyz[frame_index]
            box_vectors = trajectory.openmm_boxes(frame_index)

            if self.enable_pbc:
                openmm_context.setPeriodicBoxVectors(*box_vectors)

            openmm_context.setPositions(positions)

            state = openmm_context.getState(getEnergy=True)

            potential_energy = (state.getPotentialEnergy() +
                                subset_energy_corrections[frame_index])
            unreduced_potential = potential_energy / simtk_unit.AVOGADRO_CONSTANT_NA

            if pressure is not None and self.enable_pbc:
                unreduced_potential += pressure * state.getPeriodicBoxVolume()

            potentials[frame_index] = potential_energy.value_in_unit(
                simtk_unit.kilojoule_per_mole)
            reduced_potentials[frame_index] = unreduced_potential * beta

        potentials *= unit.kilojoule / unit.mole
        reduced_potentials *= unit.dimensionless

        statistics_array = StatisticsArray()
        statistics_array[ObservableType.ReducedPotential] = reduced_potentials
        statistics_array[ObservableType.PotentialEnergy] = potentials
        statistics_array.to_pandas_csv(file_path)

        return statistics_array
예제 #13
0
    def _execute(self, directory, available_resources):

        import mdtraj
        import openmmtools

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        with open(self.system_path, "r") as file:
            system = openmm.XmlSerializer.deserialize(file.read())

        temperature = pint_quantity_to_openmm(
            self.thermodynamic_state.temperature)
        pressure = pint_quantity_to_openmm(self.thermodynamic_state.pressure)

        if self.enable_pbc:
            system.setDefaultPeriodicBoxVectors(*trajectory.openmm_boxes(0))
        else:
            pressure = None

        openmm_state = openmmtools.states.ThermodynamicState(
            system=system, temperature=temperature, pressure=pressure)

        integrator = openmmtools.integrators.VelocityVerletIntegrator(
            0.01 * simtk_unit.femtoseconds)

        # Setup the requested platform:
        platform = setup_platform_with_resources(available_resources,
                                                 self.high_precision)
        openmm_system = openmm_state.get_system(True, True)

        if not self.enable_pbc:
            disable_pbc(openmm_system)

        openmm_context = openmm.Context(openmm_system, integrator, platform)

        potential_energies = np.zeros(trajectory.n_frames)
        reduced_potentials = np.zeros(trajectory.n_frames)

        for frame_index in range(trajectory.n_frames):

            if self.enable_pbc:
                box_vectors = trajectory.openmm_boxes(frame_index)
                openmm_context.setPeriodicBoxVectors(*box_vectors)

            positions = trajectory.xyz[frame_index]
            openmm_context.setPositions(positions)

            potential_energy = openmm_context.getState(
                getEnergy=True).getPotentialEnergy()

            potential_energies[frame_index] = potential_energy.value_in_unit(
                simtk_unit.kilojoule_per_mole)
            reduced_potentials[frame_index] = openmm_state.reduced_potential(
                openmm_context)

        kinetic_energies = StatisticsArray.from_pandas_csv(
            self.kinetic_energies_path)[ObservableType.KineticEnergy]

        statistics_array = StatisticsArray()
        statistics_array[ObservableType.PotentialEnergy] = (
            potential_energies * unit.kilojoule / unit.mole)
        statistics_array[ObservableType.KineticEnergy] = kinetic_energies
        statistics_array[ObservableType.ReducedPotential] = (
            reduced_potentials * unit.dimensionless)

        statistics_array[ObservableType.TotalEnergy] = (
            statistics_array[ObservableType.PotentialEnergy] +
            statistics_array[ObservableType.KineticEnergy])

        statistics_array[ObservableType.Enthalpy] = (
            statistics_array[ObservableType.ReducedPotential] *
            self.thermodynamic_state.inverse_beta + kinetic_energies)

        if self.use_internal_energy:
            statistics_array[ObservableType.ReducedPotential] += (
                kinetic_energies * self.thermodynamic_state.beta)

        self.statistics_file_path = os.path.join(directory, "statistics.csv")
        statistics_array.to_pandas_csv(self.statistics_file_path)