Exemple #1
0
class SubtractValues(Protocol):
    """A protocol to subtract one value from another such that:

    `result = value_b - value_a`
    """

    value_a = InputAttribute(
        docstring="`value_a` in the formula `result` = `value_b` - `value_a`.",
        type_hint=typing.Union[int, float, pint.Quantity, pint.Measurement,
                               ParameterGradient],
        default_value=UNDEFINED,
    )
    value_b = InputAttribute(
        docstring="`value_b` in the formula `result` = `value_b` - `value_a`.",
        type_hint=typing.Union[int, float, pint.Quantity, pint.Measurement,
                               ParameterGradient],
        default_value=UNDEFINED,
    )

    result = OutputAttribute(
        docstring="The results of `value_b` - `value_a`.",
        type_hint=typing.Union[int, float, pint.Measurement, pint.Quantity,
                               ParameterGradient],
    )

    def _execute(self, directory, available_resources):
        self.result = self.value_b - self.value_a
Exemple #2
0
class ConcatenateTrajectories(Protocol):
    """A protocol which concatenates multiple trajectories into
    a single one.
    """

    input_coordinate_paths = InputAttribute(
        docstring=
        "A list of paths to the starting PDB coordinates for each of the trajectories.",
        type_hint=list,
        default_value=UNDEFINED,
    )
    input_trajectory_paths = InputAttribute(
        docstring="A list of paths to the trajectories to concatenate.",
        type_hint=list,
        default_value=UNDEFINED,
    )

    output_coordinate_path = OutputAttribute(
        docstring="The path the PDB coordinate file which contains the topology "
        "of the concatenated trajectory.",
        type_hint=str,
    )

    output_trajectory_path = OutputAttribute(
        docstring="The path to the concatenated trajectory.", type_hint=str)

    def _execute(self, directory, available_resources):

        import mdtraj

        if len(self.input_coordinate_paths) != len(
                self.input_trajectory_paths):

            raise ValueError(
                "There should be the same number of coordinate and trajectory paths."
            )

        if len(self.input_trajectory_paths) == 0:
            raise ValueError("No trajectories were given to concatenate.")

        trajectories = []

        output_coordinate_path = None

        for coordinate_path, trajectory_path in zip(
                self.input_coordinate_paths, self.input_trajectory_paths):

            output_coordinate_path = output_coordinate_path or coordinate_path
            trajectories.append(
                mdtraj.load_dcd(trajectory_path, coordinate_path))

        self.output_coordinate_path = output_coordinate_path
        output_trajectory = (trajectories[0] if len(trajectories) == 1 else
                             mdtraj.join(trajectories, False, False))

        self.output_trajectory_path = path.join(directory,
                                                "output_trajectory.dcd")
        output_trajectory.save_dcd(self.output_trajectory_path)
Exemple #3
0
class AttributeObject(AttributeClass):

    required_input = InputAttribute("", str, UNDEFINED, optional=False)
    optional_input = InputAttribute("", int, UNDEFINED, optional=True)

    some_output = OutputAttribute("", int)

    def __init__(self):
        self.some_output = 5
Exemple #4
0
class AveragePropertyProtocol(Protocol, abc.ABC):
    """An abstract base class for protocols which will calculate the
    average of a property and its uncertainty via bootstrapping.
    """

    bootstrap_iterations = InputAttribute(
        docstring="The number of bootstrap iterations to perform.",
        type_hint=int,
        default_value=250,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
    )
    bootstrap_sample_size = InputAttribute(
        docstring="The relative sample size to use for bootstrapping.",
        type_hint=float,
        default_value=1.0,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
    )

    equilibration_index = OutputAttribute(
        docstring=
        "The index in the data set after which the data is stationary.",
        type_hint=int,
    )
    statistical_inefficiency = OutputAttribute(
        docstring="The statistical inefficiency in the data set.",
        type_hint=float)

    value = OutputAttribute(docstring="The average value and its uncertainty.",
                            type_hint=pint.Measurement)
    uncorrelated_values = OutputAttribute(
        docstring=
        "The uncorrelated values which the average was calculated from.",
        type_hint=pint.Quantity,
    )

    def _bootstrap_function(self, **sample_kwargs):
        """The function to perform on the data set being sampled by
        bootstrapping.

        Parameters
        ----------
        sample_kwargs: dict of str and np.ndarray
            A key words dictionary of the bootstrap sample data, where the
            sample data is a numpy array of shape=(num_frames, num_dimensions)
            with dtype=float.

        Returns
        -------
        float
            The result of evaluating the data.
        """

        assert len(sample_kwargs) == 1
        sample_data = next(iter(sample_kwargs.values()))

        return sample_data.mean()
class DummyReplicableProtocol(Protocol):

    replicated_value_a = InputAttribute(docstring="",
                                        type_hint=Union[str, int, float],
                                        default_value=UNDEFINED)
    replicated_value_b = InputAttribute(docstring="",
                                        type_hint=Union[str, int, float],
                                        default_value=UNDEFINED)
    final_value = OutputAttribute(docstring="", type_hint=pint.Measurement)

    def _execute(self, directory, available_resources):
        pass
Exemple #6
0
class AverageTrajectoryProperty(AveragePropertyProtocol, abc.ABC):
    """An abstract base class for protocols which will calculate the
    average of a property from a simulation trajectory.
    """

    input_coordinate_file = InputAttribute(
        docstring="The file path to the starting coordinates of a trajectory.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    trajectory_path = InputAttribute(
        docstring="The file path to the trajectory to average over.",
        type_hint=str,
        default_value=UNDEFINED,
    )
Exemple #7
0
class AddValues(Protocol):
    """A protocol to add together a list of values.

    Notes
    -----
    The `values` input must either be a list of pint.Quantity, a ProtocolPath to a list
    of pint.Quantity, or a list of ProtocolPath which each point to a pint.Quantity.
    """

    values = InputAttribute(docstring="The values to add together.",
                            type_hint=list,
                            default_value=UNDEFINED)

    result = OutputAttribute(
        docstring="The sum of the values.",
        type_hint=typing.Union[int, float, pint.Measurement, pint.Quantity,
                               ParameterGradient],
    )

    def _execute(self, directory, available_resources):

        if len(self.values) < 1:
            raise ValueError("There were no values to add together")

        self.result = self.values[0]

        for value in self.values[1:]:
            self.result += value
Exemple #8
0
class ExtractUncorrelatedStatisticsData(ExtractUncorrelatedData):
    """A protocol which will subsample entries from a statistics array, yielding only uncorrelated
    entries as determined from a provided statistical inefficiency and equilibration time.
    """

    input_statistics_path = InputAttribute(
        docstring="The file path to the statistics to subsample.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    output_statistics_path = OutputAttribute(
        docstring="The file path to the subsampled statistics.", type_hint=str)

    def _execute(self, directory, available_resources):

        statistics_array = StatisticsArray.from_pandas_csv(
            self.input_statistics_path)

        uncorrelated_indices = timeseries.get_uncorrelated_indices(
            len(statistics_array) - self.equilibration_index,
            self.statistical_inefficiency,
        )

        uncorrelated_indices = [
            index + self.equilibration_index for index in uncorrelated_indices
        ]
        uncorrelated_statistics = StatisticsArray.from_existing(
            statistics_array, uncorrelated_indices)

        self.output_statistics_path = path.join(directory,
                                                "uncorrelated_statistics.csv")
        uncorrelated_statistics.to_pandas_csv(self.output_statistics_path)

        self.number_of_uncorrelated_samples = len(uncorrelated_statistics)
Exemple #9
0
class ConcatenateStatistics(Protocol):
    """A protocol which concatenates multiple trajectories into
    a single one.
    """

    input_statistics_paths = InputAttribute(
        docstring="A list of paths to statistics arrays to concatenate.",
        type_hint=list,
        default_value=UNDEFINED,
    )
    output_statistics_path = OutputAttribute(
        docstring=
        "The path the csv file which contains the concatenated statistics.",
        type_hint=str,
    )

    def _execute(self, directory, available_resources):

        if len(self.input_statistics_paths) == 0:
            raise ValueError("No statistics arrays were given to concatenate.")

        arrays = [
            StatisticsArray.from_pandas_csv(file_path)
            for file_path in self.input_statistics_paths
        ]

        if len(arrays) > 1:
            output_array = StatisticsArray.join(*arrays)
        else:
            output_array = arrays[0]

        self.output_statistics_path = path.join(directory,
                                                "output_statistics.csv")
        output_array.to_pandas_csv(self.output_statistics_path)
class SolvateExistingStructure(BuildCoordinatesPackmol):
    """Solvates a set of 3D coordinates with a specified solvent
    using the PACKMOL package.
    """

    solute_coordinate_file = InputAttribute(
        docstring="A file path to the solute to solvate.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    center_solute_in_box = InputAttribute(
        docstring="If `True`, the solute to solvate will be centered in the "
        "simulation box.",
        type_hint=bool,
        default_value=True,
    )

    def _execute(self, directory, available_resources):

        molecules, number_of_molecules, exception = self._build_molecule_arrays(
        )

        packmol_directory = path.join(directory, "packmol_files")

        # Create packed box
        trajectory, residue_names = packmol.pack_box(
            molecules=molecules,
            number_of_copies=number_of_molecules,
            structure_to_solvate=self.solute_coordinate_file,
            center_solute=self.center_solute_in_box,
            mass_density=self.mass_density,
            verbose=self.verbose_packmol,
            working_directory=packmol_directory,
            retain_working_files=self.retain_packmol_files,
        )

        if trajectory is None:
            raise RuntimeError("Packmol failed to complete.")

        self.assigned_residue_names = dict()

        for component, residue_name in zip(self.substance, residue_names):
            self.assigned_residue_names[component.identifier] = residue_name

        self._save_results(directory, trajectory)
Exemple #11
0
class ExtractUncorrelatedData(Protocol, abc.ABC):
    """An abstract base class for protocols which will subsample
    a data set, yielding only equilibrated, uncorrelated data.
    """

    equilibration_index = InputAttribute(
        docstring=
        "The index in the data set after which the data is stationary.",
        type_hint=int,
        default_value=UNDEFINED,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
    )
    statistical_inefficiency = InputAttribute(
        docstring="The statistical inefficiency in the data set.",
        type_hint=float,
        default_value=UNDEFINED,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
    )

    number_of_uncorrelated_samples = OutputAttribute(
        docstring="The number of uncorrelated samples.", type_hint=int)
class BaseEnergyMinimisation(Protocol, abc.ABC):
    """A base class for protocols which will minimise the potential
    energy of a given system.
    """

    input_coordinate_file = InputAttribute(
        docstring="The coordinates to minimise.",
        type_hint=str,
        default_value=UNDEFINED)
    system_path = InputAttribute(
        docstring=
        "The path to the XML system object which defines the forces present "
        "in the system.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    tolerance = InputAttribute(
        docstring=
        "The energy tolerance to which the system should be minimized.",
        type_hint=pint.Quantity,
        default_value=10 * unit.kilojoules / unit.mole,
    )
    max_iterations = InputAttribute(
        docstring="The maximum number of iterations to perform. If this is 0, "
        "minimization is continued until the results converge without regard to "
        "how many iterations it takes.",
        type_hint=int,
        default_value=0,
    )

    enable_pbc = InputAttribute(
        docstring="If true, periodic boundary conditions will be enabled.",
        type_hint=bool,
        default_value=True,
    )

    output_coordinate_file = OutputAttribute(
        docstring="The file path to the minimised coordinates.", type_hint=str)
Exemple #13
0
class DivideValue(Protocol):
    """A protocol which divides a value by a specified scalar
    """

    value = InputAttribute(
        docstring="The value to divide.",
        type_hint=typing.Union[int, float, pint.Quantity, pint.Measurement,
                               ParameterGradient],
        default_value=UNDEFINED,
    )
    divisor = InputAttribute(
        docstring="The scalar to divide by.",
        type_hint=typing.Union[int, float, pint.Quantity],
        default_value=UNDEFINED,
    )

    result = OutputAttribute(
        docstring="The result of the division.",
        type_hint=typing.Union[int, float, pint.Measurement, pint.Quantity,
                               ParameterGradient],
    )

    def _execute(self, directory, available_resources):
        self.result = self.value / self.divisor
Exemple #14
0
class MultiplyValue(Protocol):
    """A protocol which multiplies a value by a specified scalar
    """

    value = InputAttribute(
        docstring="The value to multiply.",
        type_hint=typing.Union[int, float, pint.Quantity, pint.Measurement,
                               ParameterGradient],
        default_value=UNDEFINED,
    )
    multiplier = InputAttribute(
        docstring="The scalar to multiply by.",
        type_hint=typing.Union[int, float, pint.Quantity],
        default_value=UNDEFINED,
    )

    result = OutputAttribute(
        docstring="The result of the multiplication.",
        type_hint=typing.Union[int, float, pint.Measurement, pint.Quantity,
                               ParameterGradient],
    )

    def _execute(self, directory, available_resources):
        self.result = self.value * self.multiplier
class DummyInputOutputProtocol(Protocol):

    input_value = InputAttribute(
        docstring="A dummy input.",
        type_hint=Union[str, int, float, pint.Quantity, pint.Measurement, list,
                        tuple, dict, set, frozenset, ],
        default_value=UNDEFINED,
    )
    output_value = OutputAttribute(
        docstring="A dummy output.",
        type_hint=Union[str, int, float, pint.Quantity, pint.Measurement, list,
                        tuple, dict, set, frozenset, ],
    )

    def _execute(self, directory, available_resources):
        self.output_value = self.input_value
class BuildCoordinatesPackmol(Protocol):
    """Creates a set of 3D coordinates with a specified composition
    using the PACKMOL package.
    """

    max_molecules = InputAttribute(
        docstring="The maximum number of molecules to be added to the system.",
        type_hint=int,
        default_value=1000,
    )
    mass_density = InputAttribute(
        docstring="The target density of the created system.",
        type_hint=pint.Quantity,
        default_value=0.95 * unit.grams / unit.milliliters,
    )

    box_aspect_ratio = InputAttribute(
        docstring="The aspect ratio of the simulation box.",
        type_hint=list,
        default_value=[1.0, 1.0, 1.0],
    )

    substance = InputAttribute(
        docstring="The composition of the system to build.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    verbose_packmol = InputAttribute(
        docstring=
        "If True, packmol will print verbose information to the logger",
        type_hint=bool,
        default_value=False,
    )
    retain_packmol_files = InputAttribute(
        docstring="If True, packmol will not delete all of the temporary files "
        "it creates while building the coordinates.",
        type_hint=bool,
        default_value=False,
    )

    output_number_of_molecules = OutputAttribute(
        docstring="The number of molecules in the created system. This "
        "may be less than maximum requested due to rounding of "
        "mole fractions",
        type_hint=int,
    )
    output_substance = OutputAttribute(
        docstring="The substance which was built by packmol. This may differ "
        "from the input substance for system containing two or "
        "more components due to rounding of mole fractions. The "
        "mole fractions provided by this output should always be "
        "used when weighting values by a mole fraction.",
        type_hint=Substance,
    )

    assigned_residue_names = OutputAttribute(
        docstring="The residue names which were assigned to "
        "each of the components. Each key corresponds to a "
        "component identifier.",
        type_hint=dict,
    )

    coordinate_file_path = OutputAttribute(
        docstring="The file path to the created PDB coordinate file.",
        type_hint=str)

    def _build_molecule_arrays(self):
        """Converts the input substance into a list of molecules and a list
        of counts for how many of each there should be as determined by the
        `max_molecules` input and the substances respective mole fractions.

        Returns
        -------
        list of openforcefield.topology.Molecule
            The list of molecules.
        list of int
            The number of each molecule which should be added to the system.
        """
        from openforcefield.topology import Molecule

        molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(component.smiles)
            molecules.append(molecule)

        # Determine how many molecules of each type will be present in the system.
        molecules_per_component = self.substance.get_molecules_per_component(
            self.max_molecules)
        number_of_molecules = [0] * self.substance.number_of_components

        for index, component in enumerate(self.substance.components):
            number_of_molecules[index] = molecules_per_component[
                component.identifier]

        if sum(number_of_molecules) > self.max_molecules:

            raise ValueError(
                f"The number of molecules to create ({sum(number_of_molecules)}) is "
                f"greater than the maximum number requested ({self.max_molecules})."
            )

        return molecules, number_of_molecules, None

    def _rebuild_substance(self, number_of_molecules):
        """Rebuilds the `Substance` object which this protocol is building
        coordinates for.

        This may not be the same as the input substance due to the finite
        number of molecules to be added causing rounding of mole fractions.

        Parameters
        ----------
        number_of_molecules: list of int
            The number of each component which should be added to the system.

        Returns
        -------
        Substance
            The substance which contains the corrected component amounts.
        """

        new_amounts = defaultdict(list)

        total_number_of_molecules = sum(number_of_molecules)

        # Handle any exact amounts.
        for component in self.substance.components:

            exact_amounts = [
                amount for amount in self.substance.get_amounts(component)
                if isinstance(amount, ExactAmount)
            ]

            if len(exact_amounts) == 0:
                continue

            total_number_of_molecules -= exact_amounts[0].value
            new_amounts[component].append(exact_amounts[0])

        # Recompute the mole fractions.
        total_mole_fraction = 0.0
        number_of_new_mole_fractions = 0

        for index, component in enumerate(self.substance.components):

            mole_fractions = [
                amount for amount in self.substance.get_amounts(component)
                if isinstance(amount, MoleFraction)
            ]

            if len(mole_fractions) == 0:
                continue

            molecule_count = number_of_molecules[index]

            if component in new_amounts:
                molecule_count -= new_amounts[component][0].value

            new_mole_fraction = molecule_count / total_number_of_molecules
            new_amounts[component].append(MoleFraction(new_mole_fraction))

            total_mole_fraction += new_mole_fraction
            number_of_new_mole_fractions += 1

        if (not np.isclose(total_mole_fraction, 1.0)
                and number_of_new_mole_fractions > 0):
            raise ValueError("The new mole fraction does not equal 1.0")

        output_substance = Substance()

        for component, amounts in new_amounts.items():

            for amount in amounts:
                output_substance.add_component(component, amount)

        return output_substance

    def _save_results(self, directory, trajectory):
        """Save the results of running PACKMOL in the working directory

        Parameters
        ----------
        directory: str
            The directory to save the results in.
        trajectory : mdtraj.Trajectory
            The trajectory of the created system.
        """

        self.coordinate_file_path = path.join(directory, "output.pdb")
        trajectory.save_pdb(self.coordinate_file_path)

    def _execute(self, directory, available_resources):

        molecules, number_of_molecules, exception = self._build_molecule_arrays(
        )

        self.output_number_of_molecules = sum(number_of_molecules)
        self.output_substance = self._rebuild_substance(number_of_molecules)

        packmol_directory = path.join(directory, "packmol_files")

        # Create packed box
        trajectory, residue_names = packmol.pack_box(
            molecules=molecules,
            number_of_copies=number_of_molecules,
            mass_density=self.mass_density,
            box_aspect_ratio=self.box_aspect_ratio,
            verbose=self.verbose_packmol,
            working_directory=packmol_directory,
            retain_working_files=self.retain_packmol_files,
        )

        self.assigned_residue_names = dict()

        for component, residue_name in zip(self.substance, residue_names):
            self.assigned_residue_names[component.identifier] = residue_name

        if trajectory is None:
            raise RuntimeError("Packmol failed to complete.")

        self._save_results(directory, trajectory)
class SolvationYankProtocol(BaseYankProtocol):
    """A protocol for performing solvation alchemical free energy
    calculations using the YANK framework.

    This protocol can be used for box solvation free energies (setting
    the `solvent_1` input to the solvent of interest and setting
    `solvent_2` as an empty `Substance`) or transfer free energies (setting
    both the `solvent_1` and `solvent_2` inputs to different solvents).
    """

    solute = InputAttribute(
        docstring="The substance describing the composition of "
        "the solute. This should include the solute "
        "molecule as well as any counter ions.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    solvent_1 = InputAttribute(
        docstring=
        "The substance describing the composition of the first solvent.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )
    solvent_2 = InputAttribute(
        docstring=
        "The substance describing the composition of the second solvent.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    solvent_1_coordinates = InputAttribute(
        docstring=
        "The file path to the coordinates of the solute embedded in the "
        "first solvent.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    solvent_1_system = InputAttribute(
        docstring=
        "The file path to the system object of the solute embedded in the "
        "first solvent.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    solvent_2_coordinates = InputAttribute(
        docstring=
        "The file path to the coordinates of the solute embedded in the "
        "second solvent.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    solvent_2_system = InputAttribute(
        docstring=
        "The file path to the system object of the solute embedded in the "
        "second solvent.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    electrostatic_lambdas_1 = InputAttribute(
        docstring=
        "The list of electrostatic alchemical states that YANK should sample "
        "at. These values will be passed to the YANK `lambda_electrostatics` option. "
        "If no option is set, YANK will use `trailblaze` algorithm to determine "
        "this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    steric_lambdas_1 = InputAttribute(
        docstring=
        "The list of steric alchemical states that YANK should sample at. "
        "These values will be passed to the YANK `lambda_sterics` option. "
        "If no option is set, YANK will use `trailblaze` algorithm to determine "
        "this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    electrostatic_lambdas_2 = InputAttribute(
        docstring=
        "The list of electrostatic alchemical states that YANK should sample "
        "at. These values will be passed to the YANK `lambda_electrostatics` option. "
        "If no option is set, YANK will use `trailblaze` algorithm to determine "
        "this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    steric_lambdas_2 = InputAttribute(
        docstring=
        "The list of steric alchemical states that YANK should sample at. "
        "These values will be passed to the YANK `lambda_sterics` option. "
        "If no option is set, YANK will use `trailblaze` algorithm to determine "
        "this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )

    solvent_1_trajectory_path = OutputAttribute(
        docstring="The file path to the trajectory of the solute in the "
        "first solvent.",
        type_hint=str,
    )
    solvent_2_trajectory_path = OutputAttribute(
        docstring="The file path to the trajectory of the solute in the "
        "second solvent.",
        type_hint=str,
    )

    def __init__(self, protocol_id):
        super().__init__(protocol_id)

        self._local_solvent_1_coordinates = "solvent_1.pdb"
        self._local_solvent_1_system = "solvent_1.xml"

        self._local_solvent_2_coordinates = "solvent_2.pdb"
        self._local_solvent_2_system = "solvent_2.xml"

    def _get_system_dictionary(self):

        solvent_1_dsl = self._get_dsl_from_role(
            [self.solute, self.solvent_1],
            self.solvent_1_coordinates,
            Component.Role.Solvent,
        )

        solvent_2_dsl = self._get_dsl_from_role(
            [self.solute, self.solvent_2],
            self.solvent_2_coordinates,
            Component.Role.Solvent,
        )

        full_solvent_dsl_components = []

        if len(solvent_1_dsl) > 0:
            full_solvent_dsl_components.append(solvent_1_dsl)
        if len(solvent_2_dsl) > 0:
            full_solvent_dsl_components.append(solvent_2_dsl)

        solvation_system_dictionary = {
            "phase1_path": [
                self._local_solvent_1_system,
                self._local_solvent_1_coordinates,
            ],
            "phase2_path": [
                self._local_solvent_2_system,
                self._local_solvent_2_coordinates,
            ],
            "solvent_dsl":
            " or ".join(full_solvent_dsl_components),
        }

        return {"solvation-system": solvation_system_dictionary}

    def _get_protocol_dictionary(self):

        solvent_1_protocol_dictionary = {
            "lambda_electrostatics": self.electrostatic_lambdas_1,
            "lambda_sterics": self.steric_lambdas_1,
        }

        if (self.electrostatic_lambdas_1 == UNDEFINED
                and self.steric_lambdas_1 == UNDEFINED):

            solvent_1_protocol_dictionary = "auto"

        elif (self.electrostatic_lambdas_1 != UNDEFINED
              and self.steric_lambdas_1
              == UNDEFINED) or (self.electrostatic_lambdas_1 == UNDEFINED
                                and self.steric_lambdas_1 != UNDEFINED):

            raise ValueError("Either both of `electrostatic_lambdas_1` and "
                             "`steric_lambdas_1` must be set, or neither "
                             "must be set.")

        solvent_2_protocol_dictionary = {
            "lambda_electrostatics": self.electrostatic_lambdas_2,
            "lambda_sterics": self.steric_lambdas_2,
        }

        if (self.electrostatic_lambdas_2 == UNDEFINED
                and self.steric_lambdas_2 == UNDEFINED):

            solvent_2_protocol_dictionary = "auto"

        elif (self.electrostatic_lambdas_2 != UNDEFINED
              and self.steric_lambdas_2
              == UNDEFINED) or (self.electrostatic_lambdas_2 == UNDEFINED
                                and self.steric_lambdas_2 != UNDEFINED):

            raise ValueError("Either both of `electrostatic_lambdas_2` and "
                             "`steric_lambdas_2` must be set, or neither "
                             "must be set.")

        protocol_dictionary = {
            "solvent1": {
                "alchemical_path": solvent_1_protocol_dictionary
            },
            "solvent2": {
                "alchemical_path": solvent_2_protocol_dictionary
            },
        }

        return {"solvation-protocol": protocol_dictionary}

    def _execute(self, directory, available_resources):

        from simtk.openmm import XmlSerializer

        solute_components = [
            component for component in self.solute.components
            if component.role == Component.Role.Solute
        ]

        solvent_1_components = [
            component for component in self.solvent_1.components
            if component.role == Component.Role.Solvent
        ]

        solvent_2_components = [
            component for component in self.solvent_2.components
            if component.role == Component.Role.Solvent
        ]

        if len(solute_components) != 1:
            raise ValueError(
                "There must only be a single component marked as a solute.")
        if len(solvent_1_components) == 0 and len(solvent_2_components) == 0:
            raise ValueError(
                "At least one of the solvents must not be vacuum.")

        # Because of quirks in where Yank looks files while doing temporary
        # directory changes, we need to copy the coordinate files locally so
        # they are correctly found.
        shutil.copyfile(
            self.solvent_1_coordinates,
            os.path.join(directory, self._local_solvent_1_coordinates),
        )
        shutil.copyfile(self.solvent_1_system,
                        os.path.join(directory, self._local_solvent_1_system))

        shutil.copyfile(
            self.solvent_2_coordinates,
            os.path.join(directory, self._local_solvent_2_coordinates),
        )
        shutil.copyfile(self.solvent_2_system,
                        os.path.join(directory, self._local_solvent_2_system))

        # Disable the pbc of the any solvents which should be treated
        # as vacuum.
        vacuum_system_path = None

        if len(solvent_1_components) == 0:
            vacuum_system_path = self._local_solvent_1_system
        elif len(solvent_2_components) == 0:
            vacuum_system_path = self._local_solvent_2_system

        if vacuum_system_path is not None:

            logger.info(
                f"Disabling the periodic boundary conditions in {vacuum_system_path} "
                f"by setting the cutoff type to NoCutoff")

            with open(os.path.join(directory, vacuum_system_path),
                      "r") as file:
                vacuum_system = XmlSerializer.deserialize(file.read())

            disable_pbc(vacuum_system)

            with open(os.path.join(directory, vacuum_system_path),
                      "w") as file:
                file.write(XmlSerializer.serialize(vacuum_system))

        # Set up the yank input file.
        super(SolvationYankProtocol, self)._execute(directory,
                                                    available_resources)

        if self.setup_only:
            return

        solvent_1_yank_path = os.path.join(directory, "experiments",
                                           "solvent1.nc")
        solvent_2_yank_path = os.path.join(directory, "experiments",
                                           "solvent2.nc")

        self.solvent_1_trajectory_path = os.path.join(directory,
                                                      "solvent1.dcd")
        self.solvent_2_trajectory_path = os.path.join(directory,
                                                      "solvent2.dcd")

        self._extract_trajectory(solvent_1_yank_path,
                                 self.solvent_1_trajectory_path)
        self._extract_trajectory(solvent_2_yank_path,
                                 self.solvent_2_trajectory_path)
class LigandReceptorYankProtocol(BaseYankProtocol):
    """A protocol for performing ligand-receptor alchemical free energy
    calculations using the YANK framework.
    """
    class RestraintType(Enum):
        """The types of ligand restraints available within yank.
        """

        Harmonic = "Harmonic"
        FlatBottom = "FlatBottom"

    ligand_residue_name = InputAttribute(
        docstring="The residue name of the ligand.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    receptor_residue_name = InputAttribute(
        docstring="The residue name of the receptor.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    solvated_ligand_coordinates = InputAttribute(
        docstring="The file path to the solvated ligand coordinates.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    solvated_ligand_system = InputAttribute(
        docstring="The file path to the solvated ligand system object.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    solvated_complex_coordinates = InputAttribute(
        docstring="The file path to the solvated complex coordinates.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    solvated_complex_system = InputAttribute(
        docstring="The file path to the solvated complex system object.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    force_field_path = InputAttribute(
        docstring="The path to the force field which defines the charge method "
        "to use for the calculation.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    apply_restraints = InputAttribute(
        docstring=
        "Determines whether the ligand should be explicitly restrained to the "
        "receptor in order to stop the ligand from temporarily unbinding.",
        type_hint=bool,
        default_value=True,
    )
    restraint_type = InputAttribute(
        docstring="The type of ligand restraint applied, provided that "
        "`apply_restraints` is `True`",
        type_hint=RestraintType,
        default_value=RestraintType.Harmonic,
    )

    ligand_electrostatic_lambdas = InputAttribute(
        docstring=
        "The list of electrostatic alchemical states that YANK should sample "
        "at when calculating the free energy of the solvated ligand. If no option is "
        "set, YANK will use `trailblaze` algorithm to determine this option "
        "automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    ligand_steric_lambdas = InputAttribute(
        docstring="The list of steric alchemical states that YANK should sample "
        "at when calculating the free energy of the solvated ligand. If no option is "
        "set, YANK will use `trailblaze` algorithm to determine this option "
        "automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    complex_electrostatic_lambdas = InputAttribute(
        docstring=
        "The list of electrostatic alchemical states that YANK should sample "
        "at when calculating the free energy of the ligand in complex with the "
        "receptor. If no option is set, YANK will use `trailblaze` algorithm to "
        "determine this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )
    complex_steric_lambdas = InputAttribute(
        docstring="The list of steric alchemical states that YANK should sample "
        "at when calculating the free energy of the ligand in complex with the "
        "receptor. If no option is set, YANK will use `trailblaze` algorithm to "
        "determine this option automatically.",
        type_hint=list,
        optional=True,
        default_value=UNDEFINED,
    )

    solvated_ligand_trajectory_path = OutputAttribute(
        docstring="The file path to the generated ligand trajectory.",
        type_hint=str)
    solvated_complex_trajectory_path = OutputAttribute(
        docstring="The file path to the generated ligand trajectory.",
        type_hint=str)

    def __init__(self, protocol_id):
        """Constructs a new LigandReceptorYankProtocol object."""

        super().__init__(protocol_id)

        self._local_ligand_coordinates = "ligand.pdb"
        self._local_ligand_system = "ligand.xml"

        self._local_complex_coordinates = "complex.pdb"
        self._local_complex_system = "complex.xml"

    def _get_solvent_dictionary(self):
        """Returns a dictionary of the solvent which will be serialized
        to a yaml file and passed to YANK. In most cases, this should
        just be passing force field settings over, such as PME settings.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK solvents.
        """

        with open(self.force_field_path, "r") as file:
            force_field_source = SmirnoffForceFieldSource.parse_json(
                file.read())

        force_field = force_field_source.to_force_field()
        charge_method = force_field.get_parameter_handler(
            "Electrostatics").method

        if charge_method.lower() != "pme":
            raise ValueError(
                "Currently only PME electrostatics are supported.")

        return {"default": {"nonbonded_method": charge_method}}

    def _get_system_dictionary(self):

        solvent_dictionary = self._get_solvent_dictionary()
        solvent_key = next(iter(solvent_dictionary))

        host_guest_dictionary = {
            "phase1_path": [
                self._local_complex_system,
                self._local_complex_coordinates,
            ],
            "phase2_path":
            [self._local_ligand_system, self._local_ligand_coordinates],
            "ligand_dsl":
            f"resname {self.ligand_residue_name}",
            "solvent":
            solvent_key,
        }

        return {"host-guest": host_guest_dictionary}

    def _get_protocol_dictionary(self):

        ligand_protocol_dictionary = {
            "lambda_electrostatics": self.ligand_electrostatic_lambdas,
            "lambda_sterics": self.ligand_steric_lambdas,
        }

        if (self.ligand_electrostatic_lambdas == UNDEFINED
                and self.ligand_steric_lambdas == UNDEFINED):

            ligand_protocol_dictionary = "auto"

        elif (self.ligand_electrostatic_lambdas != UNDEFINED
              and self.ligand_steric_lambdas
              == UNDEFINED) or (self.ligand_electrostatic_lambdas == UNDEFINED
                                and self.ligand_steric_lambdas != UNDEFINED):

            raise ValueError(
                "Either both of `ligand_electrostatic_lambdas` and "
                "`ligand_steric_lambdas` must be set, or neither "
                "must be set.")

        complex_protocol_dictionary = {
            "lambda_electrostatics": self.complex_electrostatic_lambdas,
            "lambda_sterics": self.complex_steric_lambdas,
        }

        if (self.complex_electrostatic_lambdas == UNDEFINED
                and self.complex_steric_lambdas == UNDEFINED):

            complex_protocol_dictionary = "auto"

        elif (self.complex_electrostatic_lambdas != UNDEFINED
              and self.complex_steric_lambdas
              == UNDEFINED) or (self.complex_electrostatic_lambdas == UNDEFINED
                                and self.complex_steric_lambdas != UNDEFINED):

            raise ValueError(
                "Either both of `complex_electrostatic_lambdas` and "
                "`complex_steric_lambdas` must be set, or neither "
                "must be set.")

        absolute_binding_dictionary = {
            "complex": {
                "alchemical_path": complex_protocol_dictionary
            },
            "solvent": {
                "alchemical_path": ligand_protocol_dictionary
            },
        }

        return {"absolute_binding_dictionary": absolute_binding_dictionary}

    def _get_experiments_dictionary(self):

        experiments_dictionary = super(LigandReceptorYankProtocol,
                                       self)._get_experiments_dictionary()

        if self.apply_restraints:

            experiments_dictionary["restraint"] = {
                "restrained_ligand_atoms":
                f"(resname {self.ligand_residue_name}) and (mass > 1.5)",
                "restrained_receptor_atoms":
                f"(resname {self.receptor_residue_name}) and (mass > 1.5)",
                "type": self.restraint_type.value,
            }

        return experiments_dictionary

    def _get_full_input_dictionary(self, available_resources):

        full_dictionary = super(
            LigandReceptorYankProtocol,
            self)._get_full_input_dictionary(available_resources)
        full_dictionary["solvents"] = self._get_solvent_dictionary()

        return full_dictionary

    def _execute(self, directory, available_resources):

        # Because of quirks in where Yank looks files while doing temporary
        # directory changes, we need to copy the coordinate files locally so
        # they are correctly found.
        shutil.copyfile(
            self.solvated_ligand_coordinates,
            os.path.join(directory, self._local_ligand_coordinates),
        )
        shutil.copyfile(
            self.solvated_ligand_system,
            os.path.join(directory, self._local_ligand_system),
        )

        shutil.copyfile(
            self.solvated_complex_coordinates,
            os.path.join(directory, self._local_complex_coordinates),
        )
        shutil.copyfile(
            self.solvated_complex_system,
            os.path.join(directory, self._local_complex_system),
        )

        super(LigandReceptorYankProtocol,
              self)._execute(directory, available_resources)

        if self.setup_only:
            return

        ligand_yank_path = os.path.join(directory, "experiments", "solvent.nc")
        complex_yank_path = os.path.join(directory, "experiments",
                                         "complex.nc")

        self.solvated_ligand_trajectory_path = os.path.join(
            directory, "ligand.dcd")
        self.solvated_complex_trajectory_path = os.path.join(
            directory, "complex.dcd")

        self._extract_trajectory(ligand_yank_path,
                                 self.solvated_ligand_trajectory_path)
        self._extract_trajectory(complex_yank_path,
                                 self.solvated_complex_trajectory_path)
class BaseYankProtocol(Protocol, abc.ABC):
    """An abstract base class for protocols which will performs a set of
    alchemical free energy simulations using the YANK framework.
    """

    thermodynamic_state = InputAttribute(
        docstring="The state at which to run the calculations.",
        type_hint=ThermodynamicState,
        default_value=UNDEFINED,
    )

    number_of_equilibration_iterations = InputAttribute(
        docstring=
        "The number of iterations used for equilibration before production "
        "run. Only post-equilibration iterations are written to file.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1,
    )
    number_of_iterations = InputAttribute(
        docstring="The number of YANK iterations to perform.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=5000,
    )
    steps_per_iteration = InputAttribute(
        docstring="The number of steps per YANK iteration to perform.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=500,
    )
    checkpoint_interval = InputAttribute(
        docstring=
        "The number of iterations between saving YANK checkpoint files.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=50,
    )

    timestep = InputAttribute(
        docstring="The length of the timestep to take.",
        type_hint=pint.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=2 * unit.femtosecond,
    )

    verbose = InputAttribute(
        docstring="Controls whether or not to run YANK at high verbosity.",
        type_hint=bool,
        default_value=False,
    )
    setup_only = InputAttribute(
        docstring="If true, YANK will only create and validate the setup files, "
        "but not actually run any simulations. This argument is mainly "
        "only to be used for testing purposes.",
        type_hint=bool,
        default_value=False,
    )

    estimated_free_energy = OutputAttribute(
        docstring="The estimated free energy value and its uncertainty "
        "returned by YANK.",
        type_hint=pint.Measurement,
    )

    @staticmethod
    def _get_residue_names_from_role(substances, coordinate_path, role):
        """Returns a list of all of the residue names of
        components which have been assigned a given role.

        Parameters
        ----------
        substances: list of Substance
            The substances which contains the components.
        coordinate_path: str
            The path to the coordinates which describe the systems
            topology.
        role: Component.Role, optional
            The role of the component to identify.

        Returns
        -------
        set of str
            The identified residue names.
        """

        from simtk.openmm import app
        from openforcefield.topology import Molecule, Topology

        if role is None:
            return "all"

        unique_molecules = [
            Molecule.from_smiles(component.smiles) for substance in substances
            for component in substance.components
        ]

        openmm_topology = app.PDBFile(coordinate_path).topology
        topology = Topology.from_openmm(openmm_topology, unique_molecules)

        # Determine the smiles of all molecules in the system. We need to use
        # the toolkit to re-generate the smiles as later we will compare these
        # against more toolkit generated smiles.
        components = [
            component for substance in substances
            for component in substance.components if component.role == role
        ]

        component_smiles = [
            Molecule.from_smiles(component.smiles).to_smiles()
            for component in components
        ]

        residue_names = set()

        all_openmm_atoms = list(openmm_topology.atoms())

        # Find the resiude names of the molecules which have the correct
        # role.
        for topology_molecule in topology.topology_molecules:

            molecule_smiles = topology_molecule.reference_molecule.to_smiles()

            if molecule_smiles not in component_smiles:
                continue

            molecule_residue_names = set([
                all_openmm_atoms[
                    topology_atom.topology_atom_index].residue.name
                for topology_atom in topology_molecule.atoms
            ])

            assert len(molecule_residue_names) == 1
            residue_names.update(molecule_residue_names)

        return residue_names

    @staticmethod
    def _get_dsl_from_role(substances, coordinate_path, role):
        """Returns an MDTraj DSL string which identifies those
        atoms which belong to components flagged with a specific
        role.

        Parameters
        ----------
        substances: list of Substance
            The substances which contains the components.
        coordinate_path: str
            The path to the coordinates which describe the systems
            topology.
        role: Component.Role, optional
            The role of the component to identify.

        Returns
        -------
        str
            The DSL string.
        """

        residue_names = BaseYankProtocol._get_residue_names_from_role(
            substances, coordinate_path, role)

        dsl_string = " or ".join(
            [f"resname {residue_name}" for residue_name in residue_names])
        return dsl_string

    def _get_options_dictionary(self, available_resources):
        """Returns a dictionary of options which will be serialized
        to a yaml file and passed to YANK.

        Parameters
        ----------
        available_resources: ComputeResources
            The resources available to execute on.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK options.
        """

        from openforcefield.utils import quantity_to_string

        platform_name = "CPU"

        if available_resources.number_of_gpus > 0:

            # A platform which runs on GPUs has been requested.
            from evaluator.backends import ComputeResources

            toolkit_enum = ComputeResources.GPUToolkit(
                available_resources.preferred_gpu_toolkit)

            # A platform which runs on GPUs has been requested.
            platform_name = ("CUDA" if toolkit_enum
                             == ComputeResources.GPUToolkit.CUDA else
                             ComputeResources.GPUToolkit.OpenCL)

        return {
            "verbose":
            self.verbose,
            "output_dir":
            ".",
            "temperature":
            quantity_to_string(
                pint_quantity_to_openmm(self.thermodynamic_state.temperature)),
            "pressure":
            quantity_to_string(
                pint_quantity_to_openmm(self.thermodynamic_state.pressure)),
            "minimize":
            True,
            "number_of_equilibration_iterations":
            self.number_of_equilibration_iterations,
            "default_number_of_iterations":
            self.number_of_iterations,
            "default_nsteps_per_iteration":
            self.steps_per_iteration,
            "checkpoint_interval":
            self.checkpoint_interval,
            "default_timestep":
            quantity_to_string(pint_quantity_to_openmm(self.timestep)),
            "annihilate_electrostatics":
            True,
            "annihilate_sterics":
            False,
            "platform":
            platform_name,
        }

    @abc.abstractmethod
    def _get_system_dictionary(self):
        """Returns a dictionary of the system which will be serialized
        to a yaml file and passed to YANK. Only a single system may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK systems.
        """
        raise NotImplementedError()

    @abc.abstractmethod
    def _get_protocol_dictionary(self):
        """Returns a dictionary of the protocol which will be serialized
        to a yaml file and passed to YANK. Only a single protocol may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK protocol.
        """
        raise NotImplementedError()

    def _get_experiments_dictionary(self):
        """Returns a dictionary of the experiments which will be serialized
        to a yaml file and passed to YANK. Only a single experiment may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK experiment.
        """

        system_dictionary = self._get_system_dictionary()
        system_key = next(iter(system_dictionary))

        protocol_dictionary = self._get_protocol_dictionary()
        protocol_key = next(iter(protocol_dictionary))

        return {"system": system_key, "protocol": protocol_key}

    def _get_full_input_dictionary(self, available_resources):
        """Returns a dictionary of the full YANK inputs which will be serialized
        to a yaml file and passed to YANK

        Parameters
        ----------
        available_resources: ComputeResources
            The resources available to execute on.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK input file.
        """

        return {
            "options": self._get_options_dictionary(available_resources),
            "systems": self._get_system_dictionary(),
            "protocols": self._get_protocol_dictionary(),
            "experiments": self._get_experiments_dictionary(),
        }

    @staticmethod
    def _extract_trajectory(checkpoint_path, output_trajectory_path):
        """Extracts the stored trajectory of the 'initial' state from a
        yank `.nc` checkpoint file and stores it to disk as a `.dcd` file.

        Parameters
        ----------
        checkpoint_path: str
            The path to the yank `.nc` file
        output_trajectory_path: str
            The path to store the extracted trajectory at.
        """

        from yank.analyze import extract_trajectory

        mdtraj_trajectory = extract_trajectory(checkpoint_path,
                                               state_index=0,
                                               image_molecules=True)
        mdtraj_trajectory.save_dcd(output_trajectory_path)

    @staticmethod
    def _run_yank(directory, available_resources, setup_only):
        """Runs YANK within the specified directory which contains a `yank.yaml`
        input file.

        Parameters
        ----------
        directory: str
            The directory within which to run yank.
        available_resources: ComputeResources
            The compute resources available to yank.
        setup_only: bool
            If true, YANK will only create and validate the setup files,
            but not actually run any simulations. This argument is mainly
            only to be used for testing purposes.

        Returns
        -------
        simtk.pint.Quantity
            The free energy returned by yank.
        simtk.pint.Quantity
            The uncertainty in the free energy returned by yank.
        """

        from yank.experiment import ExperimentBuilder
        from yank.analyze import ExperimentAnalyzer

        from simtk import unit as simtk_unit

        with temporarily_change_directory(directory):

            # Set the default properties on the desired platform
            # before calling into yank.
            setup_platform_with_resources(available_resources)

            exp_builder = ExperimentBuilder("yank.yaml")

            if setup_only is True:
                return (
                    0.0 * simtk_unit.kilojoule_per_mole,
                    0.0 * simtk_unit.kilojoule_per_mole,
                )

            exp_builder.run_experiments()

            analyzer = ExperimentAnalyzer("experiments")
            output = analyzer.auto_analyze()

            free_energy = output["free_energy"]["free_energy_diff_unit"]
            free_energy_uncertainty = output["free_energy"][
                "free_energy_diff_error_unit"]

        return free_energy, free_energy_uncertainty

    @staticmethod
    def _run_yank_as_process(queue, directory, available_resources,
                             setup_only):
        """A wrapper around the `_run_yank` method which takes
        a `multiprocessing.Queue` as input, thereby allowing it
        to be launched from a separate process and still return
        it's output back to the main process.

        Parameters
        ----------
        queue: multiprocessing.Queue
            The queue object which will communicate with the
            launched process.
        directory: str
            The directory within which to run yank.
        available_resources: ComputeResources
            The compute resources available to yank.
        setup_only: bool
            If true, YANK will only create and validate the setup files,
            but not actually run any simulations. This argument is mainly
            only to be used for testing purposes.

        Returns
        -------
        simtk.pint.Quantity
            The free energy returned by yank.
        simtk.pint.Quantity
            The uncertainty in the free energy returned by yank.
        str, optional
            The stringified errors which occurred on the other process,
            or `None` if no exceptions were raised.
        """

        free_energy = None
        free_energy_uncertainty = None

        exception = None

        try:
            free_energy, free_energy_uncertainty = BaseYankProtocol._run_yank(
                directory, available_resources, setup_only)
        except Exception as e:
            exception = e

        queue.put((free_energy, free_energy_uncertainty, exception))

    def _execute(self, directory, available_resources):

        yaml_filename = os.path.join(directory, "yank.yaml")

        # Create the yank yaml input file from a dictionary of options.
        with open(yaml_filename, "w") as file:
            yaml.dump(
                self._get_full_input_dictionary(available_resources),
                file,
                sort_keys=False,
            )

        setup_only = self.setup_only

        # Yank is not safe to be called from anything other than the main thread.
        # If the current thread is not detected as the main one, then yank should
        # be spun up in a new process which should itself be safe to run yank in.
        if threading.current_thread() is threading.main_thread():
            logger.info("Launching YANK in the main thread.")
            free_energy, free_energy_uncertainty = self._run_yank(
                directory, available_resources, setup_only)
        else:

            from multiprocessing import Process, Queue

            logger.info("Launching YANK in a new process.")

            # Create a queue to pass the results back to the main process.
            queue = Queue()
            # Create the process within which yank will run.
            process = Process(
                target=BaseYankProtocol._run_yank_as_process,
                args=[queue, directory, available_resources, setup_only],
            )

            # Start the process and gather back the output.
            process.start()
            free_energy, free_energy_uncertainty, exception = queue.get()
            process.join()

            if exception is not None:
                raise exception

        self.estimated_free_energy = openmm_quantity_to_pint(
            free_energy).plus_minus(
                openmm_quantity_to_pint(free_energy_uncertainty))
Exemple #20
0
class BuildTLeapSystem(TemplateBuildSystem):
    """Parametrise a set of molecules with an Amber based force field.
    using the `tleap package <http://ambermd.org/AmberTools.php>`_.

    Notes
    -----
    * This protocol is currently a work in progress and as such has limited
      functionality compared to the more established `BuildSmirnoffSystem` protocol.
    * This protocol requires the optional `ambertools >=19.0` dependency to be installed.
    """
    class ChargeBackend(Enum):
        """The framework to use to assign partial charges.
        """

        OpenEye = "OpenEye"
        AmberTools = "AmberTools"

    charge_backend = InputAttribute(
        docstring="The backend framework to use to assign partial charges.",
        type_hint=ChargeBackend,
        default_value=lambda: BuildTLeapSystem.ChargeBackend.OpenEye
        if has_openeye() else BuildTLeapSystem.ChargeBackend.AmberTools,
    )

    @staticmethod
    def _run_tleap(molecule, force_field_source, directory):
        """Uses tleap to apply parameters to a particular molecule,
        generating a `.prmtop` and a `.rst7` file with the applied parameters.

        Parameters
        ----------
        molecule: openforcefield.topology.Molecule
            The molecule to parameterize.
        force_field_source: TLeapForceFieldSource
            The tleap source which describes which parameters to apply.
        directory: str
            The directory to store and temporary files / the final
            parameters in.

        Returns
        -------
        str
            The file path to the `prmtop` file.
        str
            The file path to the `rst7` file.
        """
        from simtk import unit as simtk_unit

        # Change into the working directory.
        with temporarily_change_directory(directory):

            initial_file_path = "initial.sdf"
            molecule.to_file(initial_file_path, file_format="SDF")

            # Save the molecule charges to a file.
            charges = [
                x.value_in_unit(simtk_unit.elementary_charge)
                for x in molecule.partial_charges
            ]

            with open("charges.txt", "w") as file:
                file.write(" ".join(map(str, charges)))

            if force_field_source.leap_source == "leaprc.gaff2":
                amber_type = "gaff2"
            elif force_field_source.leap_source == "leaprc.gaff":
                amber_type = "gaff"
            else:

                raise ValueError(
                    f"The {force_field_source.leap_source} source is currently "
                    f"unsupported. Only the 'leaprc.gaff2' and 'leaprc.gaff' "
                    f" sources are supported.")

            # Run antechamber to find the correct atom types.
            processed_mol2_path = "antechamber.mol2"

            antechamber_process = subprocess.Popen(
                [
                    "antechamber",
                    "-i",
                    initial_file_path,
                    "-fi",
                    "sdf",
                    "-o",
                    processed_mol2_path,
                    "-fo",
                    "mol2",
                    "-at",
                    amber_type,
                    "-rn",
                    "MOL",
                    "-an",
                    "no",
                    "-pf",
                    "yes",
                    "-c",
                    "rc",
                    "-cf",
                    "charges.txt",
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
            )

            antechamber_output, antechamber_error = antechamber_process.communicate(
            )
            antechamber_exit_code = antechamber_process.returncode

            with open("antechamber_output.log", "w") as file:
                file.write(f"error code: {antechamber_exit_code}\nstdout:\n\n")
                file.write("stdout:\n\n")
                file.write(antechamber_output.decode())
                file.write("\nstderr:\n\n")
                file.write(antechamber_error.decode())

            if not os.path.isfile(processed_mol2_path):

                raise RuntimeError(
                    f"antechamber failed to assign atom types to the input mol2 file "
                    f"({initial_file_path})")

            frcmod_path = None

            if amber_type == "gaff" or amber_type == "gaff2":

                # Optionally run parmchk to find any missing parameters.
                frcmod_path = "parmck2.frcmod"

                prmchk2_process = subprocess.Popen(
                    [
                        "parmchk2",
                        "-i",
                        processed_mol2_path,
                        "-f",
                        "mol2",
                        "-o",
                        frcmod_path,
                        "-s",
                        amber_type,
                    ],
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                )

                prmchk2_output, prmchk2_error = prmchk2_process.communicate()
                prmchk2_exit_code = prmchk2_process.returncode

                with open("prmchk2_output.log", "w") as file:
                    file.write(f"error code: {prmchk2_exit_code}\nstdout:\n\n")
                    file.write(prmchk2_output.decode())
                    file.write("\nstderr:\n\n")
                    file.write(prmchk2_error.decode())

                if not os.path.isfile(frcmod_path):

                    raise RuntimeError(
                        f"parmchk2 failed to assign missing {amber_type} parameters "
                        f"to the antechamber created mol2 file ({processed_mol2_path})",
                    )

            # Build the tleap input file.
            template_lines = [f"source {force_field_source.leap_source}"]

            if frcmod_path is not None:
                template_lines.append(f"loadamberparams {frcmod_path}", )

            prmtop_file_name = "structure.prmtop"
            rst7_file_name = "structure.rst7"

            template_lines.extend([
                f"MOL = loadmol2 {processed_mol2_path}",
                f'setBox MOL "centers"',
                "check MOL",
                f"saveamberparm MOL {prmtop_file_name} {rst7_file_name}",
            ])

            input_file_path = "tleap.in"

            with open(input_file_path, "w") as file:
                file.write("\n".join(template_lines))

            # Run tleap.
            tleap_process = subprocess.Popen(
                ["tleap", "-s ", "-f ", input_file_path],
                stdout=subprocess.PIPE)

            tleap_output, _ = tleap_process.communicate()
            tleap_exit_code = tleap_process.returncode

            with open("tleap_output.log", "w") as file:
                file.write(f"error code: {tleap_exit_code}\nstdout:\n\n")
                file.write(tleap_output.decode())

            if not os.path.isfile(prmtop_file_name) or not os.path.isfile(
                    rst7_file_name):
                raise RuntimeError(f"tleap failed to execute.")

            with open("leap.log", "r") as file:

                if re.search(
                        "ERROR|WARNING|Warning|duplicate|FATAL|Could|Fatal|Error",
                        file.read(),
                ):

                    raise RuntimeError(f"tleap failed to execute.")

        return (
            os.path.join(directory, prmtop_file_name),
            os.path.join(directory, rst7_file_name),
        )

    def _generate_charges(self, molecule):
        """Generates a set of partial charges for a molecule using
        the specified charge backend.

        Parameters
        ----------
        molecule: openforcefield.topology.Molecule
            The molecule to assign charges to.
        """

        if self.charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye:

            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper

            toolkit_wrapper = OpenEyeToolkitWrapper()

        elif self.charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools:

            from openforcefield.utils.toolkits import (
                RDKitToolkitWrapper,
                AmberToolsToolkitWrapper,
                ToolkitRegistry,
            )

            toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[
                RDKitToolkitWrapper, AmberToolsToolkitWrapper
            ])

        else:
            raise ValueError(f"Invalid toolkit specification.")

        molecule.generate_conformers(toolkit_registry=toolkit_wrapper)
        molecule.compute_partial_charges_am1bcc(
            toolkit_registry=toolkit_wrapper)

    def _parameterize_molecule(self, molecule, force_field_source, cutoff):
        """Parameterize the specified molecule.

        Parameters
        ----------
        molecule: openforcefield.topology.Molecule
            The molecule to parameterize.
        force_field_source: TLeapForceFieldSource
            The tleap source which describes which parameters to apply.

        Returns
        -------
        simtk.openmm.System
            The parameterized system.
        """

        self._generate_charges(molecule)

        prmtop_path, _ = BuildTLeapSystem._run_tleap(molecule,
                                                     force_field_source, "")
        prmtop_file = openmm.app.AmberPrmtopFile(prmtop_path)

        system = prmtop_file.createSystem(
            nonbondedMethod=app.PME,
            nonbondedCutoff=cutoff,
            constraints=app.HBonds,
            rigidWater=True,
            removeCMMotion=False,
        )

        with open(f"component.xml", "w") as file:
            file.write(openmm.XmlSerializer.serialize(system))

        return system

    def _execute(self, directory, available_resources):

        force_field_source = ForceFieldSource.from_json(self.force_field_path)

        if not isinstance(force_field_source, TLeapForceFieldSource):

            raise ValueError(
                "Only TLeap force field sources are supported by this protocol."
            )

        super(BuildTLeapSystem, self)._execute(directory, available_resources)
Exemple #21
0
class BuildSmirnoffSystem(BaseBuildSystem):
    """Parametrise a set of molecules with a given smirnoff force field
    using the `OpenFF toolkit <https://github.com/openforcefield/openforcefield>`_.
    """

    charged_molecule_paths = InputAttribute(
        docstring=
        "File paths to mol2 files which contain the charges assigned to "
        "molecules in the system. This input is helpful when dealing "
        "with large molecules (such as hosts in host-guest binding "
        "calculations) whose charges may by needed in multiple places,"
        " and hence should only be calculated once.",
        type_hint=list,
        default_value=[],
    )
    apply_known_charges = InputAttribute(
        docstring=
        "If true, the formal charges of ions and the partial charges of "
        "the selected water model will be automatically applied to any "
        "matching molecules in the system.",
        type_hint=bool,
        default_value=True,
    )

    @staticmethod
    def _generate_known_charged_molecules():
        """Generates a set of molecules whose charges are known a priori,
        such as ions, for use in parameterised systems.

        Notes
        -----
        These are solely to be used as a work around until library charges
        are fully implemented in the openforcefield toolkit.

        Todos
        -----
        Remove this method when library charges are fully implemented in
        the openforcefield toolkit.

        Returns
        -------
        list of openforcefield.topology.Molecule
            The molecules with assigned charges.
        """
        from openforcefield.topology import Molecule
        from simtk import unit as simtk_unit

        sodium = Molecule.from_smiles("[Na+]")
        sodium.partial_charges = np.array([1.0]) * simtk_unit.elementary_charge

        potassium = Molecule.from_smiles("[K+]")
        potassium.partial_charges = np.array([1.0
                                              ]) * simtk_unit.elementary_charge

        calcium = Molecule.from_smiles("[Ca+2]")
        calcium.partial_charges = np.array([2.0
                                            ]) * simtk_unit.elementary_charge

        chlorine = Molecule.from_smiles("[Cl-]")
        chlorine.partial_charges = np.array([-1.0
                                             ]) * simtk_unit.elementary_charge

        water = Molecule.from_smiles("O")
        water.partial_charges = (np.array([-0.834, 0.417, 0.417]) *
                                 simtk_unit.elementary_charge)

        return [sodium, potassium, calcium, chlorine, water]

    def _execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        pdb_file = app.PDBFile(self.coordinate_file_path)

        force_field_source = ForceFieldSource.from_json(self.force_field_path)

        if not isinstance(force_field_source, SmirnoffForceFieldSource):
            raise ValueError(
                "Only SMIRNOFF force fields are supported by this protocol.")

        force_field = force_field_source.to_force_field()

        unique_molecules = []
        charged_molecules = []

        if self.apply_known_charges:
            charged_molecules = self._generate_known_charged_molecules()

        # Load in any additional, user specified charged molecules.
        for charged_molecule_path in self.charged_molecule_paths:

            charged_molecule = Molecule.from_file(charged_molecule_path,
                                                  "MOL2")
            charged_molecules.append(charged_molecule)

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)

            if molecule is None:
                raise ValueError(
                    f"{component} could not be converted to a Molecule")

            unique_molecules.append(molecule)

        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        if len(charged_molecules) > 0:
            system = force_field.create_openmm_system(
                topology, charge_from_molecules=charged_molecules)
        else:
            system = force_field.create_openmm_system(topology)

        if system is None:

            raise RuntimeError(
                "Failed to create a system from the specified topology and molecules."
            )

        system_xml = openmm.XmlSerializer.serialize(system)
        self.system_path = os.path.join(directory, "system.xml")

        with open(self.system_path, "w") as file:
            file.write(system_xml)
Exemple #22
0
class BaseBuildSystem(Protocol, abc.ABC):
    """The base class for any protocol whose role is to apply a set of
    force field parameters to a given system.
    """
    class WaterModel(Enum):
        """An enum which describes which water model is being
        used, so that correct charges can be applied.

        Warnings
        --------
        This is only a temporary addition until full water model support
        is introduced.
        """

        TIP3P = "TIP3P"

    force_field_path = InputAttribute(
        docstring=
        "The file path to the force field parameters to assign to the system.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    coordinate_file_path = InputAttribute(
        docstring="The file path to the PDB coordinate file which defines the "
        "topology of the system to which the force field parameters "
        "will be assigned.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    substance = InputAttribute(
        docstring="The composition of the system.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )
    water_model = InputAttribute(
        docstring=
        "The water model to apply, if any water molecules are present.",
        type_hint=WaterModel,
        default_value=WaterModel.TIP3P,
    )

    system_path = OutputAttribute(
        docstring="The path to the assigned system object.", type_hint=str)

    @staticmethod
    def _append_system(existing_system, system_to_append, index_map=None):
        """Appends a system object onto the end of an existing system.

        Parameters
        ----------
        existing_system: simtk.openmm.System, optional
            The base system to extend.
        system_to_append: simtk.openmm.System
            The system to append.
        index_map: dict of int and int, optional
            A map to apply to the indices of atoms in the `system_to_append`.
            This is predominantly to be used when the ordering of the atoms
            in the `system_to_append` does not match the ordering in the full
            topology.
        """
        supported_force_types = [
            openmm.HarmonicBondForce,
            openmm.HarmonicAngleForce,
            openmm.PeriodicTorsionForce,
            openmm.NonbondedForce,
        ]

        number_of_appended_forces = 0
        index_offset = existing_system.getNumParticles()

        # Create an index map if one is not provided.
        if index_map is None:
            index_map = {
                i: i
                for i in range(system_to_append.getNumParticles())
            }

        # Append the particles.
        for index in range(system_to_append.getNumParticles()):

            index = index_map[index]
            existing_system.addParticle(
                system_to_append.getParticleMass(index))

        # Append the constraints
        for index in range(system_to_append.getNumConstraints()):

            index_a, index_b, distance = system_to_append.getConstraintParameters(
                index)

            index_a = index_map[index_a]
            index_b = index_map[index_b]

            existing_system.addConstraint(index_a + index_offset,
                                          index_b + index_offset, distance)

        # Validate the forces to append.
        for force_to_append in system_to_append.getForces():

            if type(force_to_append) in supported_force_types:
                continue

            raise ValueError(f"The system contains an unsupported type of "
                             f"force: {type(force_to_append)}.")

        # Append the forces.
        for force_to_append in system_to_append.getForces():

            existing_force = None

            for force in existing_system.getForces():

                if type(force) not in supported_force_types:

                    raise ValueError(
                        f"The existing system contains an unsupported type "
                        f"of force: {type(force)}.")

                if type(force_to_append) != type(force):
                    continue

                existing_force = force
                break

            if existing_force is None:

                existing_force = type(force_to_append)()
                existing_system.addForce(existing_force)

            if isinstance(force_to_append, openmm.HarmonicBondForce):

                # Add the bonds.
                for index in range(force_to_append.getNumBonds()):

                    index_a, index_b, *parameters = force_to_append.getBondParameters(
                        index)

                    index_a = index_map[index_a]
                    index_b = index_map[index_b]

                    existing_force.addBond(index_a + index_offset,
                                           index_b + index_offset, *parameters)

            elif isinstance(force_to_append, openmm.HarmonicAngleForce):

                # Add the angles.
                for index in range(force_to_append.getNumAngles()):

                    (
                        index_a,
                        index_b,
                        index_c,
                        *parameters,
                    ) = force_to_append.getAngleParameters(index)

                    index_a = index_map[index_a]
                    index_b = index_map[index_b]
                    index_c = index_map[index_c]

                    existing_force.addAngle(
                        index_a + index_offset,
                        index_b + index_offset,
                        index_c + index_offset,
                        *parameters,
                    )

            elif isinstance(force_to_append, openmm.PeriodicTorsionForce):

                # Add the torsions.
                for index in range(force_to_append.getNumTorsions()):

                    (
                        index_a,
                        index_b,
                        index_c,
                        index_d,
                        *parameters,
                    ) = force_to_append.getTorsionParameters(index)

                    index_a = index_map[index_a]
                    index_b = index_map[index_b]
                    index_c = index_map[index_c]
                    index_d = index_map[index_d]

                    existing_force.addTorsion(
                        index_a + index_offset,
                        index_b + index_offset,
                        index_c + index_offset,
                        index_d + index_offset,
                        *parameters,
                    )

            elif isinstance(force_to_append, openmm.NonbondedForce):

                # Add the vdW parameters
                for index in range(force_to_append.getNumParticles()):

                    index = index_map[index]

                    existing_force.addParticle(
                        *force_to_append.getParticleParameters(index))

                # Add the 1-2, 1-3 and 1-4 exceptions.
                for index in range(force_to_append.getNumExceptions()):

                    (
                        index_a,
                        index_b,
                        *parameters,
                    ) = force_to_append.getExceptionParameters(index)

                    index_a = index_map[index_a]
                    index_b = index_map[index_b]

                    existing_force.addException(index_a + index_offset,
                                                index_b + index_offset,
                                                *parameters)

            number_of_appended_forces += 1

        if number_of_appended_forces != system_to_append.getNumForces():
            raise ValueError("Not all forces were appended.")

    def _execute(self, directory, available_resources):
        raise NotImplementedError()
class BaseSimulation(Protocol, abc.ABC):
    """A base class for protocols which will perform a molecular
    simulation in a given ensemble and at a specified state.
    """

    steps_per_iteration = InputAttribute(
        docstring="The number of steps to propogate the system by at "
        "each iteration. The total number of steps performed "
        "by this protocol will be `total_number_of_iterations * "
        "steps_per_iteration`.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1000000,
    )
    total_number_of_iterations = InputAttribute(
        docstring="The number of times to propogate the system forward by the "
        "`steps_per_iteration` number of steps. The total number of "
        "steps performed by this protocol will be `total_number_of_iterations * "
        "steps_per_iteration`.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1,
    )

    output_frequency = InputAttribute(
        docstring=
        "The frequency (in number of steps) with which to write to the "
        "output statistics and trajectory files.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=3000,
    )
    checkpoint_frequency = InputAttribute(
        docstring=
        "The frequency (in multiples of `output_frequency`) with which to "
        "write to a checkpoint file, e.g. if `output_frequency=100` and "
        "`checkpoint_frequency==2`, a checkpoint file would be saved every "
        "200 steps.",
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        optional=True,
        default_value=10,
    )

    timestep = InputAttribute(
        docstring="The timestep to evolve the system by at each step.",
        type_hint=pint.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=2.0 * unit.femtosecond,
    )

    thermodynamic_state = InputAttribute(
        docstring="The thermodynamic conditions to simulate under",
        type_hint=ThermodynamicState,
        default_value=UNDEFINED,
    )
    ensemble = InputAttribute(
        docstring="The thermodynamic ensemble to simulate in.",
        type_hint=Ensemble,
        default_value=Ensemble.NPT,
    )

    thermostat_friction = InputAttribute(
        docstring="The thermostat friction coefficient.",
        type_hint=pint.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=1.0 / unit.picoseconds,
    )

    input_coordinate_file = InputAttribute(
        docstring="The file path to the starting coordinates.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    system_path = InputAttribute(
        docstring=
        "A path to the XML system object which defines the forces present "
        "in the system.",
        type_hint=str,
        default_value=UNDEFINED,
    )

    enable_pbc = InputAttribute(
        docstring="If true, periodic boundary conditions will be enabled.",
        type_hint=bool,
        default_value=True,
    )

    allow_gpu_platforms = InputAttribute(
        docstring=
        "If true, the simulation will be performed using a GPU if available, "
        "otherwise it will be constrained to only using CPUs.",
        type_hint=bool,
        default_value=True,
    )
    high_precision = InputAttribute(
        docstring="If true, the simulation will be run using double precision.",
        type_hint=bool,
        default_value=False,
    )

    output_coordinate_file = OutputAttribute(
        docstring=
        "The file path to the coordinates of the final system configuration.",
        type_hint=str,
    )
    trajectory_file_path = OutputAttribute(
        docstring=
        "The file path to the trajectory sampled during the simulation.",
        type_hint=str,
    )
    statistics_file_path = OutputAttribute(
        docstring=
        "The file path to the statistics sampled during the simulation.",
        type_hint=str,
    )
Exemple #24
0
class ReweightDielectricConstant(reweighting.BaseMBARProtocol):
    """Reweights a set of dipole moments (`reference_observables`) and volumes
    (`reference_volumes`) using MBAR, and then combines these to yeild the reweighted
    dielectric constant. Uncertainties in the dielectric constant are determined
    by bootstrapping.
    """

    reference_dipole_moments = InputAttribute(
        docstring="A Quantity wrapped np.ndarray of the dipole moments of each "
        "of the reference states.",
        type_hint=list,
        default_value=UNDEFINED,
    )
    reference_volumes = InputAttribute(
        docstring="A Quantity wrapped np.ndarray of the volumes of each of the "
        "reference states.",
        type_hint=list,
        default_value=UNDEFINED,
    )

    thermodynamic_state = InputAttribute(
        docstring="The thermodynamic state at which the trajectory was generated.",
        type_hint=ThermodynamicState,
        default_value=UNDEFINED,
    )

    def __init__(self, protocol_id):
        super().__init__(protocol_id)
        self.bootstrap_uncertainties = True

    def _bootstrap_function(
        self,
        reference_reduced_potentials,
        target_reduced_potentials,
        **reference_observables,
    ):

        assert len(reference_observables) == 3

        transposed_observables = {}

        for key in reference_observables:
            transposed_observables[key] = np.transpose(reference_observables[key])

        values, _, _ = self._reweight_observables(
            np.transpose(reference_reduced_potentials),
            np.transpose(target_reduced_potentials),
            **transposed_observables,
        )

        average_squared_dipole = values["dipoles_sqr"]
        average_dipole_squared = np.linalg.norm(values["dipoles"])

        dipole_variance = (average_squared_dipole - average_dipole_squared) * (
            unit.elementary_charge * unit.nanometers
        ) ** 2

        volume = values["volumes"] * unit.nanometer ** 3

        e0 = 8.854187817e-12 * unit.farad / unit.meter  # Taken from QCElemental

        dielectric_constant = 1.0 + dipole_variance / (
            3
            * unit.boltzmann_constant
            * self.thermodynamic_state.temperature
            * volume
            * e0
        )

        return dielectric_constant

    def _execute(self, directory, available_resources):

        if len(self.reference_dipole_moments) == 0:
            raise ValueError("There were no dipole moments to reweight.")

        if len(self.reference_volumes) == 0:
            raise ValueError("There were no volumes to reweight.")

        if not isinstance(
            self.reference_dipole_moments[0], pint.Quantity
        ) or not isinstance(self.reference_volumes[0], pint.Quantity):

            raise ValueError(
                "The reference observables should be a list of "
                "pint.Quantity wrapped ndarray's.",
            )

        if len(self.reference_dipole_moments) != len(self.reference_volumes):

            raise ValueError(
                "The number of reference dipoles does not match the "
                "number of reference volumes.",
            )

        for reference_dipoles, reference_volumes in zip(
            self.reference_dipole_moments, self.reference_volumes
        ):

            if len(reference_dipoles) == len(reference_volumes):
                continue

            raise ValueError(
                "The number of reference dipoles does not match the "
                "number of reference volumes.",
            )

        self._reference_observables = self.reference_dipole_moments

        dipole_moments = self._prepare_observables_array(self.reference_dipole_moments)
        dipole_moments_sqr = np.array(
            [[np.dot(dipole, dipole) for dipole in np.transpose(dipole_moments)]]
        )

        volumes = self._prepare_observables_array(self.reference_volumes)

        if self.bootstrap_uncertainties:

            self._execute_with_bootstrapping(
                unit.dimensionless,
                dipoles=dipole_moments,
                dipoles_sqr=dipole_moments_sqr,
                volumes=volumes,
            )
        else:

            raise ValueError(
                "Dielectric constant can only be reweighted in conjunction "
                "with bootstrapped uncertainties.",
            )
Exemple #25
0
class FilterSubstanceByRole(Protocol):
    """A protocol which takes a substance as input, and returns a substance which only
    contains components whose role match a given criteria.
    """

    input_substance = InputAttribute(
        docstring="The substance to filter.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    component_role = InputAttribute(
        docstring="The role to filter substance components against.",
        type_hint=Component.Role,
        default_value=UNDEFINED,
    )

    expected_components = InputAttribute(
        docstring="The number of components expected to remain after filtering. "
        "An exception is raised if this number is not matched.",
        type_hint=int,
        default_value=UNDEFINED,
        optional=True,
    )

    filtered_substance = OutputAttribute(docstring="The filtered substance.",
                                         type_hint=Substance)

    def _execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role != self.component_role:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if self.expected_components != UNDEFINED and self.expected_components != len(
                filtered_components):

            raise ValueError(
                f"The filtered substance does not contain the expected number of "
                f"components ({self.expected_components}) - {filtered_components}",
            )

        inverse_mole_fraction = (1.0 if np.isclose(total_mole_fraction, 0.0)
                                 else 1.0 / total_mole_fraction)

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, MoleFraction):
                    amount = MoleFraction(amount.value * inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)
Exemple #26
0
class ExtractAverageDielectric(analysis.AverageTrajectoryProperty):
    """Extracts the average dielectric constant from a simulation trajectory.
    """

    system_path = InputAttribute(
        docstring="The path to the XML system object which defines the forces present in the system.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    thermodynamic_state = InputAttribute(
        docstring="The thermodynamic state at which the trajectory was generated.",
        type_hint=ThermodynamicState,
        default_value=UNDEFINED,
    )

    dipole_moments = OutputAttribute(
        docstring="The raw (possibly correlated) dipole moments which were used in "
        "the dielectric calculation.",
        type_hint=pint.Quantity,
    )
    volumes = OutputAttribute(
        docstring="The raw (possibly correlated) which were used in the dielectric calculation.",
        type_hint=pint.Quantity,
    )

    uncorrelated_volumes = OutputAttribute(
        docstring="The uncorrelated volumes which were used in the dielectric "
        "calculation.",
        type_hint=pint.Quantity,
    )

    def _bootstrap_function(self, **sample_kwargs):
        """Calculates the static dielectric constant from an
        array of dipoles and volumes.

        Notes
        -----
        The static dielectric constant is taken from for Equation 7 of [1]

        References
        ----------
        [1] A. Glattli, X. Daura and W. F. van Gunsteren. Derivation of an improved simple point charge
            model for liquid water: SPC/A and SPC/L. J. Chem. Phys. 116(22):9811-9828, 2002

        Parameters
        ----------
        sample_kwargs: dict of str and np.ndarray
            A key words dictionary of the bootstrap sample data, where the
            sample data is a numpy array of shape=(num_frames, num_dimensions)
            with dtype=float. The kwargs should include the dipole moment and
            the system volume

        Returns
        -------
        float
            The unitless static dielectric constant
        """

        dipole_moments = sample_kwargs["dipoles"]
        volumes = sample_kwargs["volumes"]

        temperature = self.thermodynamic_state.temperature

        dipole_mu = dipole_moments.mean(0)
        shifted_dipoles = dipole_moments - dipole_mu

        dipole_variance = (shifted_dipoles * shifted_dipoles).sum(-1).mean(0) * (
            unit.elementary_charge * unit.nanometers
        ) ** 2

        volume = volumes.mean() * unit.nanometer ** 3

        e0 = 8.854187817e-12 * unit.farad / unit.meter  # Taken from QCElemental

        dielectric_constant = 1.0 + dipole_variance / (
            3 * unit.boltzmann_constant * temperature * volume * e0
        )

        return dielectric_constant

    def _extract_charges(self):
        """Extracts all of the charges from a system object.

        Returns
        -------
        list of float
        """
        from simtk import unit as simtk_unit

        charge_list = []

        with open(self._system_path, "r") as file:
            system = XmlSerializer.deserialize(file.read())

        for force_index in range(system.getNumForces()):

            force = system.getForce(force_index)

            if not isinstance(force, openmm.NonbondedForce):
                continue

            for atom_index in range(force.getNumParticles()):
                charge = force.getParticleParameters(atom_index)[0]
                charge = charge.value_in_unit(simtk_unit.elementary_charge)

                charge_list.append(charge)

        return charge_list

    def _extract_dipoles_and_volumes(self):
        """Extract the systems dipole moments and volumes.

        Returns
        -------
        numpy.ndarray
            The dipole moments of the trajectory (shape=(n_frames, 3), dtype=float)
        numpy.ndarray
            The volumes of the trajectory (shape=(n_frames, 1), dtype=float)
        """
        import mdtraj

        dipole_moments = []
        volumes = []
        charge_list = self._extract_charges()

        for chunk in mdtraj.iterload(
            self.trajectory_path, top=self.input_coordinate_file, chunk=50
        ):

            dipole_moments.extend(mdtraj.geometry.dipole_moments(chunk, charge_list))
            volumes.extend(chunk.unitcell_volumes)

        dipole_moments = np.array(dipole_moments)
        volumes = np.array(volumes)

        return dipole_moments, volumes

    def _execute(self, directory, available_resources):

        super(ExtractAverageDielectric, self)._execute(directory, available_resources)

        # Extract the dipoles
        dipole_moments, volumes = self._extract_dipoles_and_volumes()
        self.dipole_moments = dipole_moments * unit.dimensionless

        (
            dipole_moments,
            self.equilibration_index,
            self.statistical_inefficiency,
        ) = timeseries.decorrelate_time_series(dipole_moments)

        uncorrelated_length = len(volumes) - self.equilibration_index

        sample_indices = timeseries.get_uncorrelated_indices(
            uncorrelated_length, self.statistical_inefficiency
        )
        sample_indices = [index + self.equilibration_index for index in sample_indices]

        self.volumes = volumes * unit.nanometer ** 3
        uncorrelated_volumes = volumes[sample_indices]

        self.uncorrelated_values = dipole_moments * unit.dimensionless
        self.uncorrelated_volumes = uncorrelated_volumes * unit.nanometer ** 3

        value, uncertainty = bootstrap(
            self._bootstrap_function,
            self.bootstrap_iterations,
            self.bootstrap_sample_size,
            dipoles=dipole_moments,
            volumes=uncorrelated_volumes,
        )

        self.value = (value * unit.dimensionless).plus_minus(
            uncertainty * unit.dimensionless
        )
class CentralDifferenceGradient(Protocol):
    """A protocol which employs the central diference method
    to estimate the gradient of an observable A, such that

    grad = (A(x-h) - A(x+h)) / (2h)

    Notes
    -----
    The `values` input must either be a list of pint.Quantity, a ProtocolPath to a list
    of pint.Quantity, or a list of ProtocolPath which each point to a pint.Quantity.
    """

    parameter_key = InputAttribute(
        docstring="The key of the parameter to differentiate with respect to.",
        type_hint=ParameterGradientKey,
        default_value=UNDEFINED,
    )

    reverse_observable_value = InputAttribute(
        docstring="The value of the observable evaluated using the parameters"
        "perturbed in the reverse direction.",
        type_hint=typing.Union[pint.Quantity, pint.Measurement],
        default_value=UNDEFINED,
    )
    forward_observable_value = InputAttribute(
        docstring="The value of the observable evaluated using the parameters"
        "perturbed in the forward direction.",
        type_hint=typing.Union[pint.Quantity, pint.Measurement],
        default_value=UNDEFINED,
    )

    reverse_parameter_value = InputAttribute(
        docstring="The value of the parameter perturbed in the reverse direction.",
        type_hint=pint.Quantity,
        default_value=UNDEFINED,
    )
    forward_parameter_value = InputAttribute(
        docstring="The value of the parameter perturbed in the forward direction.",
        type_hint=pint.Quantity,
        default_value=UNDEFINED,
    )

    gradient = OutputAttribute(
        docstring="The estimated gradient", type_hint=ParameterGradient
    )

    def _execute(self, directory, available_resources):

        if self.forward_parameter_value < self.reverse_parameter_value:

            raise ValueError(
                f"The forward parameter value ({self.forward_parameter_value}) must "
                f"be larger than the reverse value ({self.reverse_parameter_value})."
            )

        reverse_value = self.reverse_observable_value
        forward_value = self.forward_observable_value

        if isinstance(reverse_value, pint.Measurement):
            reverse_value = reverse_value.value

        if isinstance(forward_value, pint.Measurement):
            forward_value = forward_value.value

        gradient = (forward_value - reverse_value) / (
            self.forward_parameter_value - self.reverse_parameter_value
        )

        self.gradient = ParameterGradient(self.parameter_key, gradient)
class BaseGradientPotentials(Protocol, abc.ABC):
    """A base class for protocols which will evaluate the reduced potentials of a
    series of configurations using a set of force field parameters which have been
    slightly increased and slightly decreased. These are mainly useful when
    estimating gradients with respect to force field parameters using the central
    difference method.
    """

    force_field_path = InputAttribute(
        docstring="The path to the force field which contains the parameters to "
        "differentiate the observable with respect to. When reweighting "
        "observables, this should be the `target` force field.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    statistics_path = InputAttribute(
        docstring="The path to a statistics array containing potentials "
        "evaluated at each frame of the trajectory using the input "
        "`force_field_path` and at the input `thermodynamic_state`.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    thermodynamic_state = InputAttribute(
        docstring="The thermodynamic state to estimate the gradients at. When "
        "reweighting observables, this should be the `target` state.",
        type_hint=ThermodynamicState,
        default_value=UNDEFINED,
    )

    substance = InputAttribute(
        docstring="The substance which describes the composition of the system.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    coordinate_file_path = InputAttribute(
        docstring="A path to a PDB coordinate file which describes the topology of "
        "the system.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    trajectory_file_path = InputAttribute(
        docstring="A path to the trajectory of configurations",
        type_hint=str,
        default_value=UNDEFINED,
    )

    enable_pbc = InputAttribute(
        docstring="If true, periodic boundary conditions will be enabled when "
        "re-evaluating the reduced potentials.",
        type_hint=bool,
        default_value=True,
    )

    parameter_key = InputAttribute(
        docstring="The key of the parameter to differentiate with respect to.",
        type_hint=ParameterGradientKey,
        default_value=UNDEFINED,
    )
    perturbation_scale = InputAttribute(
        docstring="The amount to perturb the parameter by, such that "
        "p_new = p_old * (1 +/- `perturbation_scale`)",
        type_hint=float,
        default_value=1.0e-4,
    )

    use_subset_of_force_field = InputAttribute(
        docstring="If true, the reduced potentials will be estimated using "
        "a system which only contains the parameters of interest, e.g. if the "
        "gradient of interest is with respect to the VdW epsilon parameter, then "
        "all valence / electrostatic terms will be ignored.",
        type_hint=bool,
        default_value=True,
    )

    effective_sample_indices = InputAttribute(
        docstring="This a placeholder input which is not currently implemented.",
        type_hint=list,
        default_value=UNDEFINED,
        optional=True,
    )

    reverse_potentials_path = OutputAttribute(
        docstring="A file path to the energies evaluated using the parameters"
        "perturbed in the reverse direction.",
        type_hint=str,
    )
    forward_potentials_path = OutputAttribute(
        docstring="A file path to the energies evaluated using the parameters"
        "perturbed in the forward direction.",
        type_hint=str,
    )
    reverse_parameter_value = OutputAttribute(
        docstring="The value of the parameter perturbed in the reverse direction.",
        type_hint=pint.Quantity,
    )
    forward_parameter_value = OutputAttribute(
        docstring="The value of the parameter perturbed in the forward direction.",
        type_hint=pint.Quantity,
    )
class BuildDockedCoordinates(Protocol):
    """Creates a set of coordinates for a ligand bound to some receptor.

    Notes
    -----
    This protocol currently only supports docking with the OpenEye OEDocking
    framework.
    """
    class ActivateSiteLocation(Enum):
        """An enum which describes the methods by which a receptors
        activate site(s) is located."""

        ReceptorCenterOfMass = "ReceptorCenterOfMass"

    ligand_substance = InputAttribute(
        docstring="A substance containing only the ligand to dock.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )
    number_of_ligand_conformers = InputAttribute(
        docstring="The number of conformers to try and dock into the "
        "receptor structure.",
        type_hint=int,
        default_value=100,
    )

    receptor_coordinate_file = InputAttribute(
        docstring=
        "The file path to the MOL2 coordinates of the receptor molecule.",
        type_hint=str,
        default_value=UNDEFINED,
    )
    activate_site_location = InputAttribute(
        docstring=
        "Defines the method by which the activate site is identified.",
        type_hint=ActivateSiteLocation,
        default_value=ActivateSiteLocation.ReceptorCenterOfMass,
    )

    docked_ligand_coordinate_path = OutputAttribute(
        docstring="The file path to the coordinates of the ligand in "
        "it's docked pose, aligned with the initial "
        "`receptor_coordinate_file`.",
        type_hint=str,
    )
    docked_complex_coordinate_path = OutputAttribute(
        docstring="The file path to the docked ligand-receptor complex.",
        type_hint=str)

    ligand_residue_name = OutputAttribute(
        docstring="The residue name assigned to the docked ligand.",
        type_hint=str)
    receptor_residue_name = OutputAttribute(
        docstring="The residue name assigned to the receptor.", type_hint=str)

    def __init__(self, protocol_id):
        super().__init__(protocol_id)

        self.ligand_residue_name = "LIG"
        self.receptor_residue_name = "REC"

    def _create_receptor(self):
        """Create an OpenEye receptor from a mol2 file.

        Returns
        -------
        openeye.oedocking.OEReceptor
            The OpenEye receptor object.
        """
        from openeye import oechem, oedocking

        input_stream = oechem.oemolistream(self.receptor_coordinate_file)

        original_receptor_molecule = oechem.OEGraphMol()
        oechem.OEReadMolecule(input_stream, original_receptor_molecule)

        center_of_mass = oechem.OEFloatArray(3)
        oechem.OEGetCenterOfMass(original_receptor_molecule, center_of_mass)

        receptor = oechem.OEGraphMol()
        oedocking.OEMakeReceptor(
            receptor,
            original_receptor_molecule,
            center_of_mass[0],
            center_of_mass[1],
            center_of_mass[2],
        )

        return receptor

    def _create_ligand(self):
        """Create an OpenEye receptor from a mol2 file.

        Returns
        -------
        openeye.oechem.OEMol
            The OpenEye ligand object with multiple conformers.
        """
        from openforcefield.topology import Molecule

        ligand = Molecule.from_smiles(
            self.ligand_substance.components[0].smiles)
        ligand.generate_conformers(
            n_conformers=self.number_of_ligand_conformers)

        # Assign AM1-BCC charges to the ligand just as an initial guess
        # for docking. In future, we may want to get the charge model
        # directly from the force field.
        ligand.compute_partial_charges_am1bcc()

        return ligand.to_openeye()

    def _execute(self, directory, available_resources):

        import mdtraj
        from openeye import oechem, oedocking
        from simtk import unit as simtk_unit

        if (len(self.ligand_substance.components) != 1
                or self.ligand_substance.components[0].role !=
                Component.Role.Ligand):

            raise ValueError(
                "The ligand substance must contain a single ligand component.")

        logger.info("Initializing the receptor molecule.")
        receptor_molecule = self._create_receptor()

        logger.info("Initializing the ligand molecule.")
        ligand_molecule = self._create_ligand()

        logger.info("Initializing the docking object.")

        # Dock the ligand to the receptor.
        dock = oedocking.OEDock()
        dock.Initialize(receptor_molecule)

        docked_ligand = oechem.OEGraphMol()

        logger.info("Performing the docking.")

        status = dock.DockMultiConformerMolecule(docked_ligand,
                                                 ligand_molecule)

        if status != oedocking.OEDockingReturnCode_Success:
            raise RuntimeError("The ligand could not be successfully docked", )

        docking_method = oedocking.OEDockMethodGetName(
            oedocking.OEDockMethod_Default)
        oedocking.OESetSDScore(docked_ligand, dock, docking_method)

        dock.AnnotatePose(docked_ligand)

        self.docked_ligand_coordinate_path = path.join(directory, "ligand.pdb")

        output_stream = oechem.oemolostream(self.docked_ligand_coordinate_path)
        oechem.OEWriteMolecule(output_stream, docked_ligand)
        output_stream.close()

        receptor_pdb_path = path.join(directory, "receptor.pdb")

        output_stream = oechem.oemolostream(receptor_pdb_path)
        oechem.OEWriteMolecule(output_stream, receptor_molecule)
        output_stream.close()

        ligand_trajectory = mdtraj.load(self.docked_ligand_coordinate_path)

        ligand_residue = ligand_trajectory.topology.residue(0)
        ligand_residue.name = self.ligand_residue_name

        # Save the ligand file with the correct residue name.
        ligand_trajectory.save(self.docked_ligand_coordinate_path)

        receptor_trajectory = mdtraj.load(receptor_pdb_path)

        receptor_residue = receptor_trajectory.topology.residue(0)
        receptor_residue.name = self.receptor_residue_name

        # Create a merged ligand-receptor topology.
        complex_topology = ligand_trajectory.topology.copy()

        atom_mapping = {}

        new_residue = complex_topology.add_residue(receptor_residue.name,
                                                   complex_topology.chain(0))

        for receptor_atom in receptor_residue.atoms:

            new_atom = complex_topology.add_atom(
                receptor_atom.name,
                receptor_atom.element,
                new_residue,
                serial=receptor_atom.serial,
            )

            atom_mapping[receptor_atom] = new_atom

        for bond in receptor_trajectory.topology.bonds:

            complex_topology.add_bond(
                atom_mapping[bond[0]],
                atom_mapping[bond[1]],
                type=bond.type,
                order=bond.order,
            )

        complex_positions = []

        complex_positions.extend(
            ligand_trajectory.openmm_positions(0).value_in_unit(
                simtk_unit.angstrom))
        complex_positions.extend(
            receptor_trajectory.openmm_positions(0).value_in_unit(
                simtk_unit.angstrom))

        complex_positions *= simtk_unit.angstrom

        self.docked_complex_coordinate_path = path.join(
            directory, "complex.pdb")

        with open(self.docked_complex_coordinate_path, "w+") as file:
            app.PDBFile.writeFile(complex_topology.to_openmm(),
                                  complex_positions, file)
Exemple #30
0
class WeightByMoleFraction(Protocol):
    """Multiplies a value by the mole fraction of a component
    in a `Substance`.
    """

    value = InputAttribute(
        docstring="The value to be weighted.",
        type_hint=typing.Union[float, int, pint.Measurement, pint.Quantity,
                               ParameterGradient],
        default_value=UNDEFINED,
    )

    component = InputAttribute(
        docstring="The component whose mole fraction to weight by.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )
    full_substance = InputAttribute(
        docstring=
        "The full substance which describes the mole fraction of the component.",
        type_hint=Substance,
        default_value=UNDEFINED,
    )

    weighted_value = OutputAttribute(
        "The value weighted by the `component`s mole fraction as determined from the "
        "`full_substance`.",
        type_hint=typing.Union[float, int, pint.Measurement, pint.Quantity,
                               ParameterGradient],
    )

    def _weight_values(self, mole_fraction):
        """Weights a value by a components mole fraction.

        Parameters
        ----------
        mole_fraction: float
            The mole fraction to weight by.

        Returns
        -------
        float, int, pint.Measurement, pint.Quantity, ParameterGradient
            The weighted value.
        """
        return self.value * mole_fraction

    def _execute(self, directory, available_resources):

        assert len(self.component.components) == 1

        main_component = self.component.components[0]
        amounts = self.full_substance.get_amounts(main_component)

        if len(amounts) != 1:

            raise ValueError(
                f"More than one type of amount was defined for component "
                f"{main_component}. Only a single mole fraction must be defined.",
            )

        amount = next(iter(amounts))

        if not isinstance(amount, MoleFraction):

            raise ValueError(
                f"The component {main_component} was given as an exact amount, and "
                f"not a mole fraction")

        self.weighted_value = self._weight_values(amount.value)