コード例 #1
0
class SubtractValues(BaseProtocol):
    """A protocol to subtract one value from another such that:

    `result = value_b - value_a`
    """

    value_a = protocol_input(
        docstring='`value_a` in the formula `result` = `value_b` - `value_a`.',
        type_hint=typing.Union[int, float, unit.Quantity, EstimatedQuantity,
                               ParameterGradient],
        default_value=UNDEFINED)
    value_b = protocol_input(
        docstring='`value_b` in the formula `result` = `value_b` - `value_a`.',
        type_hint=typing.Union[int, float, unit.Quantity, EstimatedQuantity,
                               ParameterGradient],
        default_value=UNDEFINED)

    result = protocol_output(docstring='The results of `value_b` - `value_a`.',
                             type_hint=typing.Union[int, float,
                                                    EstimatedQuantity,
                                                    unit.Quantity,
                                                    ParameterGradient])

    def execute(self, directory, available_resources):

        self.result = self.value_b - self.value_a
        return self._get_output_dictionary()
コード例 #2
0
class AverageTrajectoryProperty(AveragePropertyProtocol):
    """An abstract base class for protocols which will calculate the
    average of a property from a simulation trajectory.
    """

    input_coordinate_file = protocol_input(
        docstring='The file path to the starting coordinates of a trajectory.',
        type_hint=str,
        default_value=UNDEFINED
    )
    trajectory_path = protocol_input(
        docstring='The file path to the trajectory to average over.',
        type_hint=str,
        default_value=UNDEFINED
    )

    def execute(self, directory, available_resources):

        if self.trajectory_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The AverageTrajectoryProperty protocol '
                                                       'requires a previously calculated trajectory')

        return self._get_output_dictionary()
コード例 #3
0
class MultiplyValue(BaseProtocol):
    """A protocol which multiplies a value by a specified scalar
    """

    value = protocol_input(docstring='The value to multiply.',
                           type_hint=typing.Union[int, float, unit.Quantity,
                                                  EstimatedQuantity,
                                                  ParameterGradient],
                           default_value=UNDEFINED)
    multiplier = protocol_input(docstring='The scalar to multiply by.',
                                type_hint=typing.Union[int, float,
                                                       unit.Quantity],
                                default_value=UNDEFINED)

    result = protocol_output(docstring='The result of the multiplication.',
                             type_hint=typing.Union[int, float,
                                                    EstimatedQuantity,
                                                    unit.Quantity,
                                                    ParameterGradient])

    def execute(self, directory, available_resources):

        if isinstance(self.value, EstimatedQuantity):

            self.result = EstimatedQuantity(
                self.value.value * self.multiplier,
                self.value.uncertainty * self.multiplier, *self.value.sources)

        else:

            self.result = self.value * self.multiplier

        return self._get_output_dictionary()
コード例 #4
0
class AveragePropertyProtocol(BaseProtocol):
    """An abstract base class for protocols which will calculate the
    average of a property and its uncertainty via bootstrapping.
    """

    bootstrap_iterations = protocol_input(
        docstring='The number of bootstrap iterations to perform.',
        type_hint=int,
        default_value=250,
        merge_behavior=InequalityMergeBehaviour.LargestValue
    )
    bootstrap_sample_size = protocol_input(
        docstring='The relative sample size to use for bootstrapping.',
        type_hint=float,
        default_value=1.0,
        merge_behavior=InequalityMergeBehaviour.LargestValue
    )

    equilibration_index = protocol_output(
        docstring='The index in the data set after which the data is stationary.',
        type_hint=int
    )
    statistical_inefficiency = protocol_output(
        docstring='The statistical inefficiency in the data set.',
        type_hint=float
    )

    value = protocol_output(
        docstring='The average value and its uncertainty.',
        type_hint=EstimatedQuantity
    )
    uncorrelated_values = protocol_output(
        docstring='The uncorrelated values which the average was calculated from.',
        type_hint=unit.Quantity
    )

    def _bootstrap_function(self, **sample_kwargs):
        """The function to perform on the data set being sampled by
        bootstrapping.

        Parameters
        ----------
        sample_kwargs: dict of str and np.ndarray
            A key words dictionary of the bootstrap sample data, where the
            sample data is a numpy array of shape=(num_frames, num_dimensions)
            with dtype=float.

        Returns
        -------
        float
            The result of evaluating the data.
        """

        assert len(sample_kwargs) == 1
        sample_data = next(iter(sample_kwargs.values()))

        return sample_data.mean()

    def execute(self, directory, available_resources):
        return self._get_output_dictionary()
コード例 #5
0
class ExtractUncorrelatedData(BaseProtocol):
    """An abstract base class for protocols which will subsample
    a data set, yielding only equilibrated, uncorrelated data.
    """

    equilibration_index = protocol_input(
        docstring='The index in the data set after which the data is stationary.',
        type_hint=int,
        default_value=UNDEFINED,
        merge_behavior=InequalityMergeBehaviour.LargestValue
    )
    statistical_inefficiency = protocol_input(
        docstring='The statistical inefficiency in the data set.',
        type_hint=float,
        default_value=UNDEFINED,
        merge_behavior=InequalityMergeBehaviour.LargestValue
    )

    number_of_uncorrelated_samples = protocol_output(
        docstring='The number of uncorrelated samples.',
        type_hint=int
    )

    def execute(self, directory, available_resources):
        raise NotImplementedError
コード例 #6
0
class ConcatenateTrajectories(BaseProtocol):
    """A protocol which concatenates multiple trajectories into
    a single one.
    """

    input_coordinate_paths = protocol_input(
        docstring='A list of paths to the starting PDB coordinates for each of the trajectories.',
        type_hint=list,
        default_value=UNDEFINED
    )
    input_trajectory_paths = protocol_input(
        docstring='A list of paths to the trajectories to concatenate.',
        type_hint=list,
        default_value=UNDEFINED
    )

    output_coordinate_path = protocol_output(
        docstring='The path the PDB coordinate file which contains the topology '
                  'of the concatenated trajectory.',
        type_hint=str
    )

    output_trajectory_path = protocol_output(
        docstring='The path to the concatenated trajectory.',
        type_hint=str
    )

    def execute(self, directory, available_resources):

        import mdtraj

        if len(self.input_coordinate_paths) != len(self.input_trajectory_paths):

            return PropertyEstimatorException(directory=directory, message='There should be the same number of '
                                                                           'coordinate and trajectory paths.')

        if len(self.input_trajectory_paths) == 0:

            return PropertyEstimatorException(directory=directory, message='No trajectories were '
                                                                           'given to concatenate.')

        trajectories = []

        output_coordinate_path = None

        for coordinate_path, trajectory_path in zip(self.input_coordinate_paths,
                                                    self.input_trajectory_paths):

            output_coordinate_path = output_coordinate_path or coordinate_path
            trajectories.append(mdtraj.load_dcd(trajectory_path, coordinate_path))

        self.output_coordinate_path = output_coordinate_path
        output_trajectory = trajectories[0] if len(trajectories) == 1 else mdtraj.join(trajectories, False, False)

        self.output_trajectory_path = path.join(directory, 'output_trajectory.dcd')
        output_trajectory.save_dcd(self.output_trajectory_path)

        return self._get_output_dictionary()
コード例 #7
0
class DummyReplicableProtocol(BaseProtocol):

    replicated_value_a = protocol_input(docstring='',
                                        type_hint=Union[str, int, float],
                                        default_value=UNDEFINED)
    replicated_value_b = protocol_input(docstring='',
                                        type_hint=Union[str, int, float],
                                        default_value=UNDEFINED)
    final_value = protocol_output(docstring='', type_hint=EstimatedQuantity)
コード例 #8
0
class ConcatenateStatistics(BaseProtocol):
    """A protocol which concatenates multiple trajectories into
    a single one.
    """

    input_statistics_paths = protocol_input(
        docstring='A list of paths to statistics arrays to concatenate.',
        type_hint=list,
        default_value=UNDEFINED
    )
    output_statistics_path = protocol_output(
        docstring='The path the csv file which contains the concatenated statistics.',
        type_hint=str
    )

    def execute(self, directory, available_resources):

        if len(self.input_statistics_paths) == 0:

            return PropertyEstimatorException(directory=directory, message='No statistics arrays were '
                                                                           'given to concatenate.')

        arrays = [StatisticsArray.from_pandas_csv(file_path) for
                  file_path in self.input_statistics_paths]

        if len(arrays) > 1:
            output_array = StatisticsArray.join(*arrays)
        else:
            output_array = arrays[0]

        self.output_statistics_path = path.join(directory, 'output_statistics.csv')
        output_array.to_pandas_csv(self.output_statistics_path)

        return self._get_output_dictionary()
コード例 #9
0
class SolvateExistingStructure(BuildCoordinatesPackmol):
    """Solvates a set of 3D coordinates with a specified solvent
    using the PACKMOL package.
    """

    solute_coordinate_file = protocol_input(
        docstring='A file path to the solute to solvate.',
        type_hint=str,
        default_value=UNDEFINED
    )

    def __init__(self, protocol_id):
        """Constructs a new SolvateExistingStructure object."""
        super().__init__(protocol_id)

    def execute(self, directory, available_resources):

        logging.info(f'Generating coordinates for {self.substance.identifier}: {self.id}')

        if self.substance is None:
            return PropertyEstimatorException(directory=directory,
                                              message='The substance input is non-optional')

        if self.solute_coordinate_file is None:
            return PropertyEstimatorException(directory=directory,
                                              message='The solute coordinate file input is non-optional')

        molecules, number_of_molecules, exception = self._build_molecule_arrays(directory)

        if exception is not None:
            return exception

        packmol_directory = path.join(directory, 'packmol_files')

        # Create packed box
        topology, positions = packmol.pack_box(molecules=molecules,
                                               number_of_copies=number_of_molecules,
                                               structure_to_solvate=self.solute_coordinate_file,
                                               mass_density=self.mass_density,
                                               verbose=self.verbose_packmol,
                                               working_directory=packmol_directory,
                                               retain_working_files=self.retain_packmol_files)

        if topology is None or positions is None:
            return PropertyEstimatorException(directory=directory,
                                              message='Packmol failed to complete.')

        self._save_results(directory, topology, positions)

        return self._get_output_dictionary()
コード例 #10
0
class DivideValue(BaseProtocol):
    """A protocol which divides a value by a specified scalar
    """

    value = protocol_input(docstring='The value to divide.',
                           type_hint=typing.Union[int, float, unit.Quantity,
                                                  EstimatedQuantity,
                                                  ParameterGradient],
                           default_value=UNDEFINED)
    divisor = protocol_input(docstring='The scalar to divide by.',
                             type_hint=typing.Union[int, float, unit.Quantity],
                             default_value=UNDEFINED)

    result = protocol_output(docstring='The result of the division.',
                             type_hint=typing.Union[int, float,
                                                    EstimatedQuantity,
                                                    unit.Quantity,
                                                    ParameterGradient])

    def execute(self, directory, available_resources):

        self.result = self.value / self.divisor
        return self._get_output_dictionary()
コード例 #11
0
class DummyInputOutputProtocol(BaseProtocol):

    input_value = protocol_input(docstring='A dummy input.',
                                 type_hint=Union[str, int, float,
                                                 unit.Quantity,
                                                 EstimatedQuantity, list,
                                                 tuple, dict, set, frozenset],
                                 default_value=UNDEFINED)
    output_value = protocol_output(docstring='A dummy output.',
                                   type_hint=Union[str, int, float,
                                                   unit.Quantity,
                                                   EstimatedQuantity, list,
                                                   tuple, dict, set,
                                                   frozenset])

    def execute(self, directory, available_resources):
        self.output_value = self.input_value
        return self._get_output_dictionary()
コード例 #12
0
class ExtractUncorrelatedStatisticsData(ExtractUncorrelatedData):
    """A protocol which will subsample entries from a statistics array, yielding only uncorrelated
    entries as determined from a provided statistical inefficiency and equilibration time.
    """

    input_statistics_path = protocol_input(
        docstring='The file path to the statistics to subsample.',
        type_hint=str,
        default_value=UNDEFINED
    )

    output_statistics_path = protocol_output(
        docstring='The file path to the subsampled statistics.',
        type_hint=str
    )

    def execute(self, directory, available_resources):

        logging.info('Subsampling statistics: {}'.format(self.id))

        if self.input_statistics_path is None:

            return PropertyEstimatorException(directory=directory,
                                              message='The ExtractUncorrelatedStatisticsData protocol '
                                                       'requires a previously calculated statisitics file')

        statistics_array = StatisticsArray.from_pandas_csv(self.input_statistics_path)

        uncorrelated_indices = timeseries.get_uncorrelated_indices(len(statistics_array) - self.equilibration_index,
                                                                   self.statistical_inefficiency)

        uncorrelated_indices = [index + self.equilibration_index for index in uncorrelated_indices]
        uncorrelated_statistics = StatisticsArray.from_existing(statistics_array, uncorrelated_indices)

        self.output_statistics_path = path.join(directory, 'uncorrelated_statistics.csv')
        uncorrelated_statistics.to_pandas_csv(self.output_statistics_path)

        logging.info('Statistics subsampled: {}'.format(self.id))

        self.number_of_uncorrelated_samples = len(uncorrelated_statistics)

        return self._get_output_dictionary()
コード例 #13
0
class AddValues(BaseProtocol):
    """A protocol to add together a list of values.

    Notes
    -----
    The `values` input must either be a list of unit.Quantity, a ProtocolPath to a list
    of unit.Quantity, or a list of ProtocolPath which each point to a unit.Quantity.
    """

    values = protocol_input(docstring='The values to add together.',
                            type_hint=list,
                            default_value=UNDEFINED)

    result = protocol_output(docstring='The sum of the values.',
                             type_hint=typing.Union[int, float,
                                                    EstimatedQuantity,
                                                    unit.Quantity,
                                                    ParameterGradient])

    def execute(self, directory, available_resources):

        if len(self.values) < 1:
            return PropertyEstimatorException(
                directory, 'There were no gradients to add together')

        if not all(isinstance(x, type(self.values[0])) for x in self.values):

            return PropertyEstimatorException(
                directory, f'All values to add together must be '
                f'the same type ({" ".join(map(str, self.values))}).')

        self.result = self.values[0]

        for value in self.values[1:]:
            self.result += value

        return self._get_output_dictionary()
コード例 #14
0
ファイル: yank.py プロジェクト: MSchauperl/propertyestimator
class SolvationYankProtocol(BaseYankProtocol):
    """A protocol for performing solvation alchemical free energy
    calculations using the YANK framework.

    This protocol can be used for box solvation free energies (setting
    the `solvent_1` input to the solvent of interest and setting
    `solvent_2` as an empty `Substance`) or transfer free energies (setting
    both the `solvent_1` and `solvent_2` inputs to different solvents).
    """

    solute = protocol_input(
        docstring='The substance describing the composition of '
        'the solute. This should include the solute '
        'molecule as well as any counter ions.',
        type_hint=Substance,
        default_value=UNDEFINED)

    solvent_1 = protocol_input(
        docstring='The substance describing the composition of '
        'the first solvent.',
        type_hint=Substance,
        default_value=UNDEFINED)
    solvent_2 = protocol_input(
        docstring='The substance describing the composition of '
        'the second solvent.',
        type_hint=Substance,
        default_value=UNDEFINED)

    solvent_1_coordinates = protocol_input(
        docstring=
        'The file path to the coordinates of the solute embedded in the '
        'first solvent.',
        type_hint=str,
        default_value=UNDEFINED)
    solvent_1_system = protocol_input(
        docstring=
        'The file path to the system object of the solute embedded in the '
        'first solvent.',
        type_hint=str,
        default_value=UNDEFINED)

    solvent_2_coordinates = protocol_input(
        docstring=
        'The file path to the coordinates of the solute embedded in the '
        'second solvent.',
        type_hint=str,
        default_value=UNDEFINED)
    solvent_2_system = protocol_input(
        docstring=
        'The file path to the system object of the solute embedded in the '
        'second solvent.',
        type_hint=str,
        default_value=UNDEFINED)

    electrostatic_lambdas_1 = protocol_input(
        docstring=
        'The list of electrostatic alchemical states that YANK should sample at. '
        'These values will be passed to the YANK `lambda_electrostatics` option. '
        'If no option is set, YANK will use `trailblaze` algorithm to determine '
        'this option automatically.',
        type_hint=list,
        optional=True,
        default_value=UNDEFINED)
    steric_lambdas_1 = protocol_input(
        docstring=
        'The list of steric alchemical states that YANK should sample at. '
        'These values will be passed to the YANK `lambda_sterics` option. '
        'If no option is set, YANK will use `trailblaze` algorithm to determine '
        'this option automatically.',
        type_hint=list,
        optional=True,
        default_value=UNDEFINED)
    electrostatic_lambdas_2 = protocol_input(
        docstring=
        'The list of electrostatic alchemical states that YANK should sample at. '
        'These values will be passed to the YANK `lambda_electrostatics` option. '
        'If no option is set, YANK will use `trailblaze` algorithm to determine '
        'this option automatically.',
        type_hint=list,
        optional=True,
        default_value=UNDEFINED)
    steric_lambdas_2 = protocol_input(
        docstring=
        'The list of steric alchemical states that YANK should sample at. '
        'These values will be passed to the YANK `lambda_sterics` option. '
        'If no option is set, YANK will use `trailblaze` algorithm to determine '
        'this option automatically.',
        type_hint=list,
        optional=True,
        default_value=UNDEFINED)

    solvent_1_trajectory_path = protocol_output(
        docstring='The file path to the trajectory of the solute in the '
        'first solvent.',
        type_hint=str)
    solvent_2_trajectory_path = protocol_output(
        docstring='The file path to the trajectory of the solute in the '
        'second solvent.',
        type_hint=str)

    def __init__(self, protocol_id):
        super().__init__(protocol_id)

        self._local_solvent_1_coordinates = 'solvent_1.pdb'
        self._local_solvent_1_system = 'solvent_1.xml'

        self._local_solvent_2_coordinates = 'solvent_2.pdb'
        self._local_solvent_2_system = 'solvent_2.xml'

    def _get_system_dictionary(self):

        solvent_1_dsl = self._get_dsl_from_role(
            [self.solute, self.solvent_1], self.solvent_1_coordinates,
            Substance.ComponentRole.Solvent)

        solvent_2_dsl = self._get_dsl_from_role(
            [self.solute, self.solvent_2], self.solvent_2_coordinates,
            Substance.ComponentRole.Solvent)

        full_solvent_dsl_components = []

        if len(solvent_1_dsl) > 0:
            full_solvent_dsl_components.append(solvent_1_dsl)
        if len(solvent_2_dsl) > 0:
            full_solvent_dsl_components.append(solvent_2_dsl)

        solvation_system_dictionary = {
            'phase1_path':
            [self._local_solvent_1_system, self._local_solvent_1_coordinates],
            'phase2_path':
            [self._local_solvent_2_system, self._local_solvent_2_coordinates],
            'solvent_dsl':
            ' or '.join(full_solvent_dsl_components)
        }

        return {'solvation-system': solvation_system_dictionary}

    def _get_protocol_dictionary(self):

        solvent_1_protocol_dictionary = {
            'lambda_electrostatics': self.electrostatic_lambdas_1,
            'lambda_sterics': self.steric_lambdas_1
        }

        if self.electrostatic_lambdas_1 == UNDEFINED and self.steric_lambdas_1 == UNDEFINED:

            solvent_1_protocol_dictionary = 'auto'

        elif ((self.electrostatic_lambdas_1 != UNDEFINED
               and self.steric_lambdas_1 == UNDEFINED)
              or (self.electrostatic_lambdas_1 == UNDEFINED
                  and self.steric_lambdas_1 != UNDEFINED)):

            raise ValueError('Either both of `electrostatic_lambdas_1` and '
                             '`steric_lambdas_1` must be set, or neither '
                             'must be set.')

        solvent_2_protocol_dictionary = {
            'lambda_electrostatics': self.electrostatic_lambdas_2,
            'lambda_sterics': self.steric_lambdas_2
        }

        if self.electrostatic_lambdas_2 == UNDEFINED and self.steric_lambdas_2 == UNDEFINED:

            solvent_2_protocol_dictionary = 'auto'

        elif ((self.electrostatic_lambdas_2 != UNDEFINED
               and self.steric_lambdas_2 == UNDEFINED)
              or (self.electrostatic_lambdas_2 == UNDEFINED
                  and self.steric_lambdas_2 != UNDEFINED)):

            raise ValueError('Either both of `electrostatic_lambdas_2` and '
                             '`steric_lambdas_2` must be set, or neither '
                             'must be set.')

        protocol_dictionary = {
            'solvent1': {
                'alchemical_path': solvent_1_protocol_dictionary
            },
            'solvent2': {
                'alchemical_path': solvent_2_protocol_dictionary
            }
        }

        return {'solvation-protocol': protocol_dictionary}

    def execute(self, directory, available_resources):

        from simtk.openmm import XmlSerializer

        solute_components = [
            component for component in self.solute.components
            if component.role == Substance.ComponentRole.Solute
        ]

        solvent_1_components = [
            component for component in self.solvent_1.components
            if component.role == Substance.ComponentRole.Solvent
        ]

        solvent_2_components = [
            component for component in self.solvent_2.components
            if component.role == Substance.ComponentRole.Solvent
        ]

        if len(solute_components) != 1:
            return PropertyEstimatorException(
                directory,
                'There must only be a single component marked as a solute.')
        if len(solvent_1_components) == 0 and len(solvent_2_components) == 0:
            return PropertyEstimatorException(
                directory, 'At least one of the solvents must not be vacuum.')

        # Because of quirks in where Yank looks files while doing temporary
        # directory changes, we need to copy the coordinate files locally so
        # they are correctly found.
        shutil.copyfile(
            self.solvent_1_coordinates,
            os.path.join(directory, self._local_solvent_1_coordinates))
        shutil.copyfile(self.solvent_1_system,
                        os.path.join(directory, self._local_solvent_1_system))

        shutil.copyfile(
            self.solvent_2_coordinates,
            os.path.join(directory, self._local_solvent_2_coordinates))
        shutil.copyfile(self.solvent_2_system,
                        os.path.join(directory, self._local_solvent_2_system))

        # Disable the pbc of the any solvents which should be treated
        # as vacuum.
        vacuum_system_path = None

        if len(solvent_1_components) == 0:
            vacuum_system_path = self._local_solvent_1_system
        elif len(solvent_2_components) == 0:
            vacuum_system_path = self._local_solvent_2_system

        if vacuum_system_path is not None:

            logging.info(
                f'Disabling the periodic boundary conditions in {vacuum_system_path} '
                f'by setting the cutoff type to NoCutoff')

            with open(os.path.join(directory, vacuum_system_path),
                      'r') as file:
                vacuum_system = XmlSerializer.deserialize(file.read())

            disable_pbc(vacuum_system)

            with open(os.path.join(directory, vacuum_system_path),
                      'w') as file:
                file.write(XmlSerializer.serialize(vacuum_system))

        # Set up the yank input file.
        result = super(SolvationYankProtocol,
                       self).execute(directory, available_resources)

        if isinstance(result, PropertyEstimatorException):
            return result

        if self.setup_only:
            return self._get_output_dictionary()

        solvent_1_yank_path = os.path.join(directory, 'experiments',
                                           'solvent1.nc')
        solvent_2_yank_path = os.path.join(directory, 'experiments',
                                           'solvent2.nc')

        self.solvent_1_trajectory_path = os.path.join(directory,
                                                      'solvent1.dcd')
        self.solvent_2_trajectory_path = os.path.join(directory,
                                                      'solvent2.dcd')

        self._extract_trajectory(solvent_1_yank_path,
                                 self.solvent_1_trajectory_path)
        self._extract_trajectory(solvent_2_yank_path,
                                 self.solvent_2_trajectory_path)

        return self._get_output_dictionary()
コード例 #15
0
ファイル: yank.py プロジェクト: MSchauperl/propertyestimator
class LigandReceptorYankProtocol(BaseYankProtocol):
    """A protocol for performing ligand-receptor alchemical free energy
    calculations using the YANK framework.
    """
    class RestraintType(Enum):
        """The types of ligand restraints available within yank.
        """
        Harmonic = 'Harmonic'
        FlatBottom = 'FlatBottom'

    ligand_residue_name = protocol_input(
        docstring='The residue name of the ligand.',
        type_hint=str,
        default_value=UNDEFINED)
    receptor_residue_name = protocol_input(
        docstring='The residue name of the receptor.',
        type_hint=str,
        default_value=UNDEFINED)

    solvated_ligand_coordinates = protocol_input(
        docstring='The file path to the solvated ligand coordinates.',
        type_hint=str,
        default_value=UNDEFINED)
    solvated_ligand_system = protocol_input(
        docstring='The file path to the solvated ligand system object.',
        type_hint=str,
        default_value=UNDEFINED)

    solvated_complex_coordinates = protocol_input(
        docstring='The file path to the solvated complex coordinates.',
        type_hint=str,
        default_value=UNDEFINED)
    solvated_complex_system = protocol_input(
        docstring='The file path to the solvated complex system object.',
        type_hint=str,
        default_value=UNDEFINED)

    force_field_path = protocol_input(
        docstring='The path to the force field which defines the charge method '
        'to use for the calculation.',
        type_hint=str,
        default_value=UNDEFINED)

    apply_restraints = protocol_input(
        docstring=
        'Determines whether the ligand should be explicitly restrained to the '
        'receptor in order to stop the ligand from temporarily unbinding.',
        type_hint=bool,
        default_value=True)
    restraint_type = protocol_input(
        docstring=
        'The type of ligand restraint applied, provided that `apply_restraints` '
        'is `True`',
        type_hint=RestraintType,
        default_value=RestraintType.Harmonic)

    solvated_ligand_trajectory_path = protocol_output(
        docstring='The file path to the generated ligand trajectory.',
        type_hint=str)
    solvated_complex_trajectory_path = protocol_output(
        docstring='The file path to the generated ligand trajectory.',
        type_hint=str)

    def __init__(self, protocol_id):
        """Constructs a new LigandReceptorYankProtocol object."""

        super().__init__(protocol_id)

        self._local_ligand_coordinates = 'ligand.pdb'
        self._local_ligand_system = 'ligand.xml'

        self._local_complex_coordinates = 'complex.pdb'
        self._local_complex_system = 'complex.xml'

    def _get_solvent_dictionary(self):
        """Returns a dictionary of the solvent which will be serialized
        to a yaml file and passed to YANK. In most cases, this should
        just be passing force field settings over, such as PME settings.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK solvents.
        """

        with open(self.force_field_path, 'r') as file:
            force_field_source = SmirnoffForceFieldSource.parse_json(
                file.read())

        force_field = force_field_source.to_force_field()
        charge_method = force_field.get_parameter_handler(
            'Electrostatics').method

        if charge_method.lower() != 'pme':
            raise ValueError(
                'Currently only PME electrostatics are supported.')

        return {
            'default': {
                'nonbonded_method': charge_method,
            }
        }

    def _get_system_dictionary(self):

        solvent_dictionary = self._get_solvent_dictionary()
        solvent_key = next(iter(solvent_dictionary))

        host_guest_dictionary = {
            'phase1_path':
            [self._local_complex_system, self._local_complex_coordinates],
            'phase2_path':
            [self._local_ligand_system, self._local_ligand_coordinates],
            'ligand_dsl':
            f'resname {self.ligand_residue_name}',
            'solvent':
            solvent_key
        }

        return {'host-guest': host_guest_dictionary}

    def _get_protocol_dictionary(self):

        absolute_binding_dictionary = {
            'complex': {
                'alchemical_path': 'auto'
            },
            'solvent': {
                'alchemical_path': 'auto'
            }
        }

        return {'absolute_binding_dictionary': absolute_binding_dictionary}

    def _get_experiments_dictionary(self):

        experiments_dictionary = super(LigandReceptorYankProtocol,
                                       self)._get_experiments_dictionary()

        if self.apply_restraints:

            experiments_dictionary['restraint'] = {
                'restrained_ligand_atoms':
                f'(resname {self.ligand_residue_name}) and (mass > 1.5)',
                'restrained_receptor_atoms':
                f'(resname {self.receptor_residue_name}) and (mass > 1.5)',
                'type': self.restraint_type.value
            }

        return experiments_dictionary

    def _get_full_input_dictionary(self, available_resources):

        full_dictionary = super(
            LigandReceptorYankProtocol,
            self)._get_full_input_dictionary(available_resources)
        full_dictionary['solvents'] = self._get_solvent_dictionary()

        return full_dictionary

    def execute(self, directory, available_resources):

        # Because of quirks in where Yank looks files while doing temporary
        # directory changes, we need to copy the coordinate files locally so
        # they are correctly found.
        shutil.copyfile(
            self.solvated_ligand_coordinates,
            os.path.join(directory, self._local_ligand_coordinates))
        shutil.copyfile(self.solvated_ligand_system,
                        os.path.join(directory, self._local_ligand_system))

        shutil.copyfile(
            self.solvated_complex_coordinates,
            os.path.join(directory, self._local_complex_coordinates))
        shutil.copyfile(self.solvated_complex_system,
                        os.path.join(directory, self._local_complex_system))

        result = super(LigandReceptorYankProtocol,
                       self).execute(directory, available_resources)

        if isinstance(result, PropertyEstimatorException):
            return result

        if self.setup_only:
            return self._get_output_dictionary()

        ligand_yank_path = os.path.join(directory, 'experiments', 'solvent.nc')
        complex_yank_path = os.path.join(directory, 'experiments',
                                         'complex.nc')

        self.solvated_ligand_trajectory_path = os.path.join(
            directory, 'ligand.dcd')
        self.solvated_complex_trajectory_path = os.path.join(
            directory, 'complex.dcd')

        self._extract_trajectory(ligand_yank_path,
                                 self.solvated_ligand_trajectory_path)
        self._extract_trajectory(complex_yank_path,
                                 self.solvated_complex_trajectory_path)

        return self._get_output_dictionary()
コード例 #16
0
ファイル: yank.py プロジェクト: MSchauperl/propertyestimator
class BaseYankProtocol(BaseProtocol):
    """An abstract base class for protocols which will performs a set of alchemical
    free energy simulations using the YANK framework.

    Protocols which inherit from this base must implement the abstract `_get_yank_options`
    methods.
    """

    thermodynamic_state = protocol_input(
        docstring='The state at which to run the calculations.',
        type_hint=ThermodynamicState,
        default_value=UNDEFINED)

    number_of_equilibration_iterations = protocol_input(
        docstring=
        'The number of iterations used for equilibration before production run. '
        'Only post-equilibration iterations are written to file.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1)
    number_of_iterations = protocol_input(
        docstring='The number of YANK iterations to perform.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=5000)
    steps_per_iteration = protocol_input(
        docstring='The number of steps per YANK iteration to perform.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=500)
    checkpoint_interval = protocol_input(
        docstring=
        'The number of iterations between saving YANK checkpoint files.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=50)

    timestep = protocol_input(
        docstring='The length of the timestep to take.',
        type_hint=unit.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=2 * unit.femtosecond)

    verbose = protocol_input(
        docstring='Controls whether or not to run YANK at high verbosity.',
        type_hint=bool,
        default_value=False)
    setup_only = protocol_input(
        docstring='If true, YANK will only create and validate the setup files, '
        'but not actually run any simulations. This argument is mainly '
        'only to be used for testing purposes.',
        type_hint=bool,
        default_value=False)

    estimated_free_energy = protocol_output(
        docstring='The estimated free energy value and its uncertainty '
        'returned by YANK.',
        type_hint=EstimatedQuantity)

    @staticmethod
    def _get_residue_names_from_role(substances, coordinate_path, role):
        """Returns a list of all of the residue names of
        components which have been assigned a given role.

        Parameters
        ----------
        substances: list of Substance
            The substances which contains the components.
        coordinate_path: str
            The path to the coordinates which describe the systems
            topology.
        role: Substance.ComponentRole
            The role of the component to identify.

        Returns
        -------
        set of str
            The identified residue names.
        """

        from simtk.openmm import app
        from openforcefield.topology import Molecule, Topology

        if role == Substance.ComponentRole.Undefined:
            return 'all'

        unique_molecules = [
            Molecule.from_smiles(component.smiles) for substance in substances
            for component in substance.components
        ]

        openmm_topology = app.PDBFile(coordinate_path).topology
        topology = Topology.from_openmm(openmm_topology, unique_molecules)

        # Determine the smiles of all molecules in the system. We need to use
        # the toolkit to re-generate the smiles as later we will compare these
        # against more toolkit generated smiles.
        components = [
            component for substance in substances
            for component in substance.components if component.role == role
        ]

        component_smiles = [
            Molecule.from_smiles(component.smiles).to_smiles()
            for component in components
        ]

        residue_names = set()

        all_openmm_atoms = list(openmm_topology.atoms())

        # Find the resiude names of the molecules which have the correct
        # role.
        for topology_molecule in topology.topology_molecules:

            molecule_smiles = topology_molecule.reference_molecule.to_smiles()

            if molecule_smiles not in component_smiles:
                continue

            molecule_residue_names = set([
                all_openmm_atoms[
                    topology_atom.topology_atom_index].residue.name
                for topology_atom in topology_molecule.atoms
            ])

            assert len(molecule_residue_names) == 1
            residue_names.update(molecule_residue_names)

        return residue_names

    @staticmethod
    def _get_dsl_from_role(substances, coordinate_path, role):
        """Returns an MDTraj DSL string which identifies those
        atoms which belong to components flagged with a specific
        role.

        Parameters
        ----------
        substances: list of Substance
            The substances which contains the components.
        coordinate_path: str
            The path to the coordinates which describe the systems
            topology.
        role: Substance.ComponentRole
            The role of the component to identify.

        Returns
        -------
        str
            The DSL string.
        """

        residue_names = BaseYankProtocol._get_residue_names_from_role(
            substances, coordinate_path, role)

        dsl_string = ' or '.join(
            [f'resname {residue_name}' for residue_name in residue_names])
        return dsl_string

    def _get_options_dictionary(self, available_resources):
        """Returns a dictionary of options which will be serialized
        to a yaml file and passed to YANK.

        Parameters
        ----------
        available_resources: ComputeResources
            The resources available to execute on.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK options.
        """

        from openforcefield.utils import quantity_to_string

        platform_name = 'CPU'

        if available_resources.number_of_gpus > 0:

            # A platform which runs on GPUs has been requested.
            from propertyestimator.backends import ComputeResources
            toolkit_enum = ComputeResources.GPUToolkit(
                available_resources.preferred_gpu_toolkit)

            # A platform which runs on GPUs has been requested.
            platform_name = 'CUDA' if toolkit_enum == ComputeResources.GPUToolkit.CUDA else \
                                                      ComputeResources.GPUToolkit.OpenCL

        return {
            'verbose':
            self.verbose,
            'output_dir':
            '.',
            'temperature':
            quantity_to_string(
                pint_quantity_to_openmm(self.thermodynamic_state.temperature)),
            'pressure':
            quantity_to_string(
                pint_quantity_to_openmm(self.thermodynamic_state.pressure)),
            'minimize':
            True,
            'number_of_equilibration_iterations':
            self.number_of_equilibration_iterations,
            'default_number_of_iterations':
            self.number_of_iterations,
            'default_nsteps_per_iteration':
            self.steps_per_iteration,
            'checkpoint_interval':
            self.checkpoint_interval,
            'default_timestep':
            quantity_to_string(pint_quantity_to_openmm(self.timestep)),
            'annihilate_electrostatics':
            True,
            'annihilate_sterics':
            False,
            'platform':
            platform_name
        }

    def _get_system_dictionary(self):
        """Returns a dictionary of the system which will be serialized
        to a yaml file and passed to YANK. Only a single system may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of YANK systems.
        """
        raise NotImplementedError()

    def _get_protocol_dictionary(self):
        """Returns a dictionary of the protocol which will be serialized
        to a yaml file and passed to YANK. Only a single protocol may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK protocol.
        """
        raise NotImplementedError()

    def _get_experiments_dictionary(self):
        """Returns a dictionary of the experiments which will be serialized
        to a yaml file and passed to YANK. Only a single experiment may be
        specified.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK experiment.
        """

        system_dictionary = self._get_system_dictionary()
        system_key = next(iter(system_dictionary))

        protocol_dictionary = self._get_protocol_dictionary()
        protocol_key = next(iter(protocol_dictionary))

        return {'system': system_key, 'protocol': protocol_key}

    def _get_full_input_dictionary(self, available_resources):
        """Returns a dictionary of the full YANK inputs which will be serialized
        to a yaml file and passed to YANK

        Parameters
        ----------
        available_resources: ComputeResources
            The resources available to execute on.

        Returns
        -------
        dict of str and Any
            A yaml compatible dictionary of a YANK input file.
        """

        return {
            'options': self._get_options_dictionary(available_resources),
            'systems': self._get_system_dictionary(),
            'protocols': self._get_protocol_dictionary(),
            'experiments': self._get_experiments_dictionary()
        }

    @staticmethod
    def _extract_trajectory(checkpoint_path, output_trajectory_path):
        """Extracts the stored trajectory of the 'initial' state from a
        yank `.nc` checkpoint file and stores it to disk as a `.dcd` file.

        Parameters
        ----------
        checkpoint_path: str
            The path to the yank `.nc` file
        output_trajectory_path: str
            The path to store the extracted trajectory at.
        """

        from yank.analyze import extract_trajectory

        mdtraj_trajectory = extract_trajectory(checkpoint_path,
                                               state_index=0,
                                               image_molecules=True)
        mdtraj_trajectory.save_dcd(output_trajectory_path)

    @staticmethod
    def _run_yank(directory, available_resources, setup_only):
        """Runs YANK within the specified directory which contains a `yank.yaml`
        input file.

        Parameters
        ----------
        directory: str
            The directory within which to run yank.
        available_resources: ComputeResources
            The compute resources available to yank.
        setup_only: bool
            If true, YANK will only create and validate the setup files,
            but not actually run any simulations. This argument is mainly
            only to be used for testing purposes.

        Returns
        -------
        simtk.unit.Quantity
            The free energy returned by yank.
        simtk.unit.Quantity
            The uncertainty in the free energy returned by yank.
        """

        from yank.experiment import ExperimentBuilder
        from yank.analyze import ExperimentAnalyzer

        from simtk import unit as simtk_unit

        with temporarily_change_directory(directory):

            # Set the default properties on the desired platform
            # before calling into yank.
            setup_platform_with_resources(available_resources)

            exp_builder = ExperimentBuilder('yank.yaml')

            if setup_only is True:
                return 0.0 * simtk_unit.kilojoule_per_mole, 0.0 * simtk_unit.kilojoule_per_mole

            exp_builder.run_experiments()

            analyzer = ExperimentAnalyzer('experiments')
            output = analyzer.auto_analyze()

            free_energy = output['free_energy']['free_energy_diff_unit']
            free_energy_uncertainty = output['free_energy'][
                'free_energy_diff_error_unit']

        return free_energy, free_energy_uncertainty

    @staticmethod
    def _run_yank_as_process(queue, directory, available_resources,
                             setup_only):
        """A wrapper around the `_run_yank` method which takes
        a `multiprocessing.Queue` as input, thereby allowing it
        to be launched from a separate process and still return
        it's output back to the main process.

        Parameters
        ----------
        queue: multiprocessing.Queue
            The queue object which will communicate with the
            launched process.
        directory: str
            The directory within which to run yank.
        available_resources: ComputeResources
            The compute resources available to yank.
        setup_only: bool
            If true, YANK will only create and validate the setup files,
            but not actually run any simulations. This argument is mainly
            only to be used for testing purposes.

        Returns
        -------
        simtk.unit.Quantity
            The free energy returned by yank.
        simtk.unit.Quantity
            The uncertainty in the free energy returned by yank.
        str, optional
            The stringified errors which occurred on the other process,
            or `None` if no exceptions were raised.
        """

        free_energy = None
        free_energy_uncertainty = None

        error = None

        try:
            free_energy, free_energy_uncertainty = BaseYankProtocol._run_yank(
                directory, available_resources, setup_only)
        except Exception as e:
            error = traceback.format_exception(None, e, e.__traceback__)

        queue.put((free_energy, free_energy_uncertainty, error))

    def execute(self, directory, available_resources):

        yaml_filename = os.path.join(directory, 'yank.yaml')

        # Create the yank yaml input file from a dictionary of options.
        with open(yaml_filename, 'w') as file:
            yaml.dump(self._get_full_input_dictionary(available_resources),
                      file,
                      sort_keys=False)

        setup_only = self.setup_only

        # Yank is not safe to be called from anything other than the main thread.
        # If the current thread is not detected as the main one, then yank should
        # be spun up in a new process which should itself be safe to run yank in.
        if threading.current_thread() is threading.main_thread():
            logging.info('Launching YANK in the main thread.')
            free_energy, free_energy_uncertainty = self._run_yank(
                directory, available_resources, setup_only)
        else:

            from multiprocessing import Process, Queue

            logging.info('Launching YANK in a new process.')

            # Create a queue to pass the results back to the main process.
            queue = Queue()
            # Create the process within which yank will run.
            process = Process(
                target=BaseYankProtocol._run_yank_as_process,
                args=[queue, directory, available_resources, setup_only])

            # Start the process and gather back the output.
            process.start()
            free_energy, free_energy_uncertainty, error = queue.get()
            process.join()

            if error is not None:
                return PropertyEstimatorException(directory, error)

        self.estimated_free_energy = EstimatedQuantity(
            openmm_quantity_to_pint(free_energy),
            openmm_quantity_to_pint(free_energy_uncertainty), self._id)

        return self._get_output_dictionary()
コード例 #17
0
class CentralDifferenceGradient(BaseProtocol):
    """A protocol which employs the central diference method
    to estimate the gradient of an observable A, such that

    grad = (A(x-h) - A(x+h)) / (2h)

    Notes
    -----
    The `values` input must either be a list of unit.Quantity, a ProtocolPath to a list
    of unit.Quantity, or a list of ProtocolPath which each point to a unit.Quantity.
    """

    parameter_key = protocol_input(
        docstring='The key of the parameter to differentiate with respect to.',
        type_hint=ParameterGradientKey,
        default_value=UNDEFINED
    )

    reverse_observable_value = protocol_input(
        docstring='The value of the observable evaluated using the parameters'
                  'perturbed in the reverse direction.',
        type_hint=typing.Union[unit.Quantity, EstimatedQuantity],
        default_value=UNDEFINED
    )
    forward_observable_value = protocol_input(
        docstring='The value of the observable evaluated using the parameters'
                  'perturbed in the forward direction.',
        type_hint=typing.Union[unit.Quantity, EstimatedQuantity],
        default_value=UNDEFINED
    )

    reverse_parameter_value = protocol_input(
        docstring='The value of the parameter perturbed in the reverse '
                  'direction.',
        type_hint=unit.Quantity,
        default_value=UNDEFINED
    )
    forward_parameter_value = protocol_input(
        docstring='The value of the parameter perturbed in the forward '
                  'direction.',
        type_hint=unit.Quantity,
        default_value=UNDEFINED
    )

    gradient = protocol_output(
        docstring='The estimated gradient',
        type_hint=ParameterGradient
    )

    def execute(self, directory, available_resources):

        if self.forward_parameter_value < self.reverse_parameter_value:

            return PropertyEstimatorException(f'The forward parameter value ({self.forward_parameter_value}) must '
                                              f'be larger than the reverse value ({self.reverse_parameter_value}).')

        reverse_value = self.reverse_observable_value
        forward_value = self.forward_observable_value

        if isinstance(reverse_value, EstimatedQuantity):
            reverse_value = reverse_value.value

        if isinstance(forward_value, EstimatedQuantity):
            forward_value = forward_value.value

        gradient = ((forward_value - reverse_value) /
                    (self.forward_parameter_value - self.reverse_parameter_value))

        self.gradient = ParameterGradient(self.parameter_key, gradient)

        return self._get_output_dictionary()
コード例 #18
0
class GradientReducedPotentials(BaseProtocol):
    """A protocol to estimates the the reduced potential of the configurations
    of a trajectory using reverse and forward perturbed simulation parameters for
    use with estimating reweighted gradients using the central difference method.
    """

    reference_force_field_paths = protocol_input(
        docstring='A list of paths to the force field files which were '
                  'originally used to generate the configurations.',
        type_hint=list,
        default_value=UNDEFINED
    )
    force_field_path = protocol_input(
        docstring='The path to the force field which contains the parameters to '
                  'differentiate the observable with respect to.',
        type_hint=str,
        default_value=UNDEFINED
    )

    reference_statistics_path = protocol_input(
        docstring='An optional path to the statistics array which was '
                  'generated alongside the observable of interest, which will '
                  'be used to correct the potential energies at the reverse '
                  'and forward states. This is only really needed when the '
                  'observable of interest is an energy.',
        type_hint=str,
        default_value=UNDEFINED,
        optional=True
    )

    enable_pbc = protocol_input(
        docstring='If true, periodic boundary conditions will be enabled when '
                  're-evaluating the reduced potentials.',
        type_hint=bool,
        default_value=True
    )

    substance = protocol_input(
        docstring='The substance which describes the composition of the system.',
        type_hint=Substance,
        default_value=UNDEFINED
    )
    thermodynamic_state = protocol_input(
        docstring='The thermodynamic state to estimate the gradients at.',
        type_hint=ThermodynamicState,
        default_value=UNDEFINED
    )

    coordinate_file_path = protocol_input(
        docstring='A path to a PDB coordinate file which describes the topology of '
                  'the system.',
        type_hint=str,
        default_value=UNDEFINED
    )
    trajectory_file_path = protocol_input(
        docstring='A path to the trajectory of configurations',
        type_hint=str,
        default_value=UNDEFINED
    )

    parameter_key = protocol_input(
        docstring='The key of the parameter to differentiate with respect to.',
        type_hint=ParameterGradientKey,
        default_value=UNDEFINED
    )

    perturbation_scale = protocol_input(
        docstring='The amount to perturb the parameter by, such that '
                  'p_new = p_old * (1 +/- `perturbation_scale`)',
        type_hint=float,
        default_value=1.0e-4
    )

    use_subset_of_force_field = protocol_input(
        docstring='If true, the reduced potential will be estimated using '
                  'an OpenMM system which only contains the parameter of '
                  'interest',
        type_hint=bool,
        default_value=True
    )

    effective_sample_indices = protocol_input(
        docstring='This a placeholder input which is not currently implemented.',
        type_hint=list,
        default_value=UNDEFINED,
        optional=True
    )

    reference_potential_paths = protocol_output(
        docstring='File paths to the reduced potentials evaluated using each '
                  'of the reference force fields.',
        type_hint=list
    )
    reverse_potentials_path = protocol_output(
        docstring='A file path to the energies evaluated using the parameters'
                  'perturbed in the reverse direction.',
        type_hint=str
    )
    forward_potentials_path = protocol_output(
        docstring='A file path to the energies evaluated using the parameters'
                  'perturbed in the forward direction.',
        type_hint=str
    )

    reverse_parameter_value = protocol_output(
        docstring='The value of the parameter perturbed in the reverse '
                  'direction.',
        type_hint=unit.Quantity
    )
    forward_parameter_value = protocol_output(
        docstring='The value of the parameter perturbed in the forward '
                  'direction.',
        type_hint=unit.Quantity
    )

    def _build_reduced_system(self, original_force_field, topology, scale_amount=None):
        """Produces an OpenMM system containing only forces for the specified parameter,
         optionally perturbed by the amount specified by `scale_amount`.

        Parameters
        ----------
        original_force_field: openforcefield.typing.engines.smirnoff.ForceField
            The force field to create the system from (and optionally perturb).
        topology: openforcefield.topology.Topology
            The topology of the system to apply the force field to.
        scale_amount: float, optional
            The optional amount to perturb the parameter by.

        Returns
        -------
        simtk.openmm.System
            The created system.
        simtk.unit.Quantity
            The new value of the perturbed parameter.
        """
        # As this method deals mainly with the toolkit, we stick to
        # simtk units here.
        from openforcefield.typing.engines.smirnoff import ForceField

        parameter_tag = self.parameter_key.tag
        parameter_smirks = self.parameter_key.smirks
        parameter_attribute = self.parameter_key.attribute

        original_handler = original_force_field.get_parameter_handler(parameter_tag)
        original_parameter = original_handler.parameters[parameter_smirks]

        if self.use_subset_of_force_field:

            force_field = ForceField()
            handler = copy.deepcopy(original_force_field.get_parameter_handler(parameter_tag))
            force_field.register_parameter_handler(handler)

        else:

            force_field = copy.deepcopy(original_force_field)
            handler = force_field.get_parameter_handler(parameter_tag)

        parameter_index = None
        value_list = None

        if hasattr(original_parameter, parameter_attribute):
            parameter_value = getattr(original_parameter, parameter_attribute)
        else:
            attribute_split = re.split(r'(\d+)', parameter_attribute)

            assert len(parameter_attribute) == 2
            assert hasattr(original_parameter, attribute_split[0])

            parameter_attribute = attribute_split[0]
            parameter_index = int(attribute_split[1]) - 1

            value_list = getattr(original_parameter, parameter_attribute)
            parameter_value = value_list[parameter_index]

        if scale_amount is not None:

            existing_parameter = handler.parameters[parameter_smirks]

            if np.isclose(parameter_value.value_in_unit(parameter_value.unit), 0.0):
                # Careful thought needs to be given to this. Consider cases such as
                # epsilon or sigma where negative values are not allowed.
                parameter_value = (scale_amount if scale_amount > 0.0 else 0.0) * parameter_value.unit
            else:
                parameter_value *= (1.0 + scale_amount)

            if value_list is None:
                setattr(existing_parameter, parameter_attribute, parameter_value)
            else:
                value_list[parameter_index] = parameter_value
                setattr(existing_parameter, parameter_attribute, value_list)

        system = force_field.create_openmm_system(topology)

        if not self.enable_pbc:
            disable_pbc(system)

        return system, parameter_value

    def _evaluate_reduced_potential(self, system, trajectory, file_path,
                                    compute_resources, subset_energy_corrections=None):
        """Return the potential energy.
        Parameters
        ----------
        system: simtk.openmm.System
            The system which encodes the interaction forces for the
            specified parameter.
        trajectory: mdtraj.Trajectory
            A trajectory of configurations to evaluate.
        file_path: str
            The path to save the reduced potentials to.
        compute_resources: ComputeResources
            The compute resources available to execute on.
        subset_energy_corrections: unit.Quantity, optional
            A unit.Quantity wrapped numpy.ndarray which contains a set
            of energies to add to the re-evaluated potential energies.
            This is mainly used to correct the potential energies evaluated
            using a subset of the force field back to energies as if evaluated
            using the full thing.

        Returns
        ---------
        propertyestimator.unit.Quantity
            A unit bearing `np.ndarray` which contains the reduced potential.
        PropertyEstimatorException, optional
            Any exceptions that were raised.
        """
        from simtk import unit as simtk_unit

        integrator = openmm.VerletIntegrator(0.1 * simtk_unit.femtoseconds)

        platform = setup_platform_with_resources(compute_resources, True)
        openmm_context = openmm.Context(system, integrator, platform)

        potentials = np.zeros(trajectory.n_frames, dtype=np.float64)
        reduced_potentials = np.zeros(trajectory.n_frames, dtype=np.float64)

        temperature = pint_quantity_to_openmm(self.thermodynamic_state.temperature)
        beta = 1.0 / (simtk_unit.BOLTZMANN_CONSTANT_kB * temperature)

        pressure = pint_quantity_to_openmm(self.thermodynamic_state.pressure)

        for frame_index in range(trajectory.n_frames):

            positions = trajectory.xyz[frame_index]
            box_vectors = trajectory.openmm_boxes(frame_index)

            if self.enable_pbc:
                openmm_context.setPeriodicBoxVectors(*box_vectors)

            openmm_context.setPositions(positions)

            state = openmm_context.getState(getEnergy=True)

            unreduced_potential = state.getPotentialEnergy() / simtk_unit.AVOGADRO_CONSTANT_NA

            if pressure is not None and self.enable_pbc:
                unreduced_potential += pressure * state.getPeriodicBoxVolume()

            potentials[frame_index] = state.getPotentialEnergy().value_in_unit(simtk_unit.kilojoule_per_mole)
            reduced_potentials[frame_index] = unreduced_potential * beta

        potentials *= unit.kilojoule / unit.mole
        reduced_potentials *= unit.dimensionless

        if subset_energy_corrections is not None:
            potentials += subset_energy_corrections

        statistics_array = StatisticsArray()
        statistics_array[ObservableType.ReducedPotential] = reduced_potentials
        statistics_array[ObservableType.PotentialEnergy] = potentials
        statistics_array.to_pandas_csv(file_path)

    def execute(self, directory, available_resources):

        import mdtraj

        from openforcefield.topology import Molecule, Topology

        logging.info(f'Calculating the reduced gradient potentials for {self.parameter_key}: {self._id}')

        if len(self.reference_force_field_paths) != 1 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'A single reference force field must be '
                                                         'provided when calculating the reduced '
                                                         'potentials using a subset of the full force')

        if len(self.reference_statistics_path) <= 0 and self.use_subset_of_force_field:

            return PropertyEstimatorException(directory, 'The path to the statistics evaluated using '
                                                         'the full force field must be provided.')

        with open(self.force_field_path) as file:
            target_force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(target_force_field_source, SmirnoffForceFieldSource):

            return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                         'this protocol.')

        target_force_field = target_force_field_source.to_force_field()

        trajectory = mdtraj.load_dcd(self.trajectory_file_path,
                                     self.coordinate_file_path)

        unique_molecules = []

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)
            unique_molecules.append(molecule)

        pdb_file = app.PDBFile(self.coordinate_file_path)
        topology = Topology.from_openmm(pdb_file.topology, unique_molecules=unique_molecules)

        # If we are using only a subset of the system object, load in the reference
        # statistics containing the full system energies to correct the output
        # forward and reverse potential energies.
        reference_statistics = None
        subset_energy_corrections = None

        if self.use_subset_of_force_field:
            reference_statistics = StatisticsArray.from_pandas_csv(self.reference_statistics_path)

        # Compute the reduced reference energy if any reference force field files
        # have been provided.
        self.reference_potential_paths = []

        for index, reference_force_field_path in enumerate(self.reference_force_field_paths):

            with open(reference_force_field_path) as file:
                reference_force_field_source = ForceFieldSource.parse_json(file.read())

            if not isinstance(reference_force_field_source, SmirnoffForceFieldSource):
                return PropertyEstimatorException(directory, 'Only SMIRNOFF force fields are supported by '
                                                             'this protocol.')

            reference_force_field = reference_force_field_source.to_force_field()
            reference_system, _ = self._build_reduced_system(reference_force_field, topology)

            reference_potentials_path = path.join(directory, f'reference_{index}.csv')

            self._evaluate_reduced_potential(reference_system, trajectory,
                                             reference_potentials_path,
                                             available_resources)

            self.reference_potential_paths.append(reference_potentials_path)

            if reference_statistics is not None:

                subset_energies = StatisticsArray.from_pandas_csv(reference_potentials_path)
                subset_energy_corrections = (reference_statistics[ObservableType.PotentialEnergy] -
                                             subset_energies[ObservableType.PotentialEnergy])

                subset_energies[ObservableType.PotentialEnergy] = reference_statistics[ObservableType.PotentialEnergy]
                subset_energies.to_pandas_csv(reference_potentials_path)

        # Build the slightly perturbed system.
        reverse_system, reverse_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             -self.perturbation_scale)

        forward_system, forward_parameter_value = self._build_reduced_system(target_force_field,
                                                                             topology,
                                                                             self.perturbation_scale)

        self.reverse_parameter_value = openmm_quantity_to_pint(reverse_parameter_value)
        self.forward_parameter_value = openmm_quantity_to_pint(forward_parameter_value)

        # Calculate the reduced potentials.
        self.reverse_potentials_path = path.join(directory, 'reverse.csv')
        self.forward_potentials_path = path.join(directory, 'forward.csv')

        self._evaluate_reduced_potential(reverse_system, trajectory, self.reverse_potentials_path,
                                         available_resources, subset_energy_corrections)
        self._evaluate_reduced_potential(forward_system, trajectory, self.forward_potentials_path,
                                         available_resources, subset_energy_corrections)

        logging.info(f'Finished calculating the reduced gradient potentials.')

        return self._get_output_dictionary()
コード例 #19
0
class RunEnergyMinimisation(BaseProtocol):
    """A protocol to minimise the potential energy of a system.
    """

    input_coordinate_file = protocol_input(
        docstring='The coordinates to minimise.',
        type_hint=str,
        default_value=UNDEFINED)
    system_path = protocol_input(
        docstring=
        'The path to the XML system object which defines the forces present '
        'in the system.',
        type_hint=str,
        default_value=UNDEFINED)

    tolerance = protocol_input(
        docstring=
        'The energy tolerance to which the system should be minimized.',
        type_hint=unit.Quantity,
        default_value=10 * unit.kilojoules / unit.mole)
    max_iterations = protocol_input(
        docstring='The maximum number of iterations to perform. If this is 0, '
        'minimization is continued until the results converge without regard to '
        'how many iterations it takes.',
        type_hint=int,
        default_value=0)

    enable_pbc = protocol_input(
        docstring='If true, periodic boundary conditions will be enabled.',
        type_hint=bool,
        default_value=True)

    output_coordinate_file = protocol_output(
        docstring='The file path to the minimised coordinates.', type_hint=str)

    def execute(self, directory, available_resources):

        logging.info('Minimising energy: ' + self.id)

        platform = setup_platform_with_resources(available_resources)

        input_pdb_file = app.PDBFile(self.input_coordinate_file)

        with open(self.system_path, 'rb') as file:
            system = openmm.XmlSerializer.deserialize(file.read().decode())

        if not self.enable_pbc:

            for force_index in range(system.getNumForces()):

                force = system.getForce(force_index)

                if not isinstance(force, openmm.NonbondedForce):
                    continue

                force.setNonbondedMethod(
                    0)  # NoCutoff = 0, NonbondedMethod.CutoffNonPeriodic = 1

        # TODO: Expose the constraint tolerance
        integrator = openmm.VerletIntegrator(0.002 * simtk_unit.picoseconds)
        simulation = app.Simulation(input_pdb_file.topology, system,
                                    integrator, platform)

        box_vectors = input_pdb_file.topology.getPeriodicBoxVectors()

        if box_vectors is None:
            box_vectors = simulation.system.getDefaultPeriodicBoxVectors()

        simulation.context.setPeriodicBoxVectors(*box_vectors)
        simulation.context.setPositions(input_pdb_file.positions)

        simulation.minimizeEnergy(pint_quantity_to_openmm(self.tolerance),
                                  self.max_iterations)

        positions = simulation.context.getState(
            getPositions=True).getPositions()

        self.output_coordinate_file = os.path.join(directory, 'minimised.pdb')

        with open(self.output_coordinate_file, 'w+') as minimised_file:
            app.PDBFile.writeFile(simulation.topology, positions,
                                  minimised_file)

        logging.info('Energy minimised: ' + self.id)

        return self._get_output_dictionary()
コード例 #20
0
class RunOpenMMSimulation(BaseProtocol):
    """Performs a molecular dynamics simulation in a given ensemble using
    an OpenMM backend.
    """
    class _Checkpoint:
        """A temporary checkpoint file which keeps track
        of the parts of the simulation state not stored in
        the checkpoint state xml file.
        """
        def __init__(self,
                     output_frequency=-1,
                     checkpoint_frequency=-1,
                     steps_per_iteration=-1,
                     current_step_number=0):

            self.output_frequency = output_frequency
            self.checkpoint_frequency = checkpoint_frequency
            self.steps_per_iteration = steps_per_iteration
            self.current_step_number = current_step_number

        def __getstate__(self):
            return {
                'output_frequency': self.output_frequency,
                'checkpoint_frequency': self.checkpoint_frequency,
                'steps_per_iteration': self.steps_per_iteration,
                'current_step_number': self.current_step_number
            }

        def __setstate__(self, state):
            self.output_frequency = state['output_frequency']
            self.checkpoint_frequency = state['checkpoint_frequency']
            self.steps_per_iteration = state['steps_per_iteration']
            self.current_step_number = state['current_step_number']

    class _Simulation:
        """A fake simulation class to use with the
        openmm file reporters.
        """
        def __init__(self, integrator, topology, system, current_step):
            self.integrator = integrator
            self.topology = topology
            self.system = system
            self.currentStep = current_step

    steps_per_iteration = protocol_input(
        docstring='The number of steps to propogate the system by at '
        'each iteration. The total number of steps performed '
        'by this protocol will be `total_number_of_iterations * '
        'steps_per_iteration`.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1000000)
    total_number_of_iterations = protocol_input(
        docstring='The number of times to propogate the system forward by the '
        '`steps_per_iteration` number of steps. The total number of '
        'steps performed by this protocol will be `total_number_of_iterations * '
        'steps_per_iteration`.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.LargestValue,
        default_value=1)

    output_frequency = protocol_input(
        docstring=
        'The frequency (in number of steps) with which to write to the '
        'output statistics and trajectory files.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=3000)
    checkpoint_frequency = protocol_input(
        docstring=
        'The frequency (in multiples of `output_frequency`) with which to '
        'write to a checkpoint file, e.g. if `output_frequency=100` and '
        '`checkpoint_frequency==2`, a checkpoint file would be saved every '
        '200 steps.',
        type_hint=int,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        optional=True,
        default_value=10)

    timestep = protocol_input(
        docstring='The timestep to evolve the system by at each step.',
        type_hint=unit.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=2.0 * unit.femtosecond)

    thermodynamic_state = protocol_input(
        docstring='The thermodynamic conditions to simulate under',
        type_hint=ThermodynamicState,
        default_value=UNDEFINED)
    ensemble = protocol_input(
        docstring='The thermodynamic ensemble to simulate in.',
        type_hint=Ensemble,
        default_value=Ensemble.NPT)

    thermostat_friction = protocol_input(
        docstring='The thermostat friction coefficient.',
        type_hint=unit.Quantity,
        merge_behavior=InequalityMergeBehaviour.SmallestValue,
        default_value=1.0 / unit.picoseconds)

    input_coordinate_file = protocol_input(
        docstring='The file path to the starting coordinates.',
        type_hint=str,
        default_value=UNDEFINED)
    system_path = protocol_input(
        docstring=
        'A path to the XML system object which defines the forces present '
        'in the system.',
        type_hint=str,
        default_value=UNDEFINED)

    enable_pbc = protocol_input(
        docstring='If true, periodic boundary conditions will be enabled.',
        type_hint=bool,
        default_value=True)

    allow_gpu_platforms = protocol_input(
        docstring=
        'If true, OpenMM will be allowed to run using a GPU if available, '
        'otherwise it will be constrained to only using CPUs.',
        type_hint=bool,
        default_value=True)
    high_precision = protocol_input(
        docstring=
        'If true, OpenMM will be run using a platform with high precision '
        'settings. This will be the Reference platform when only a CPU is '
        'available, or double precision mode when a GPU is available.',
        type_hint=bool,
        default_value=False)

    output_coordinate_file = protocol_output(
        docstring=
        'The file path to the coordinates of the final system configuration.',
        type_hint=str)
    trajectory_file_path = protocol_output(
        docstring=
        'The file path to the trajectory sampled during the simulation.',
        type_hint=str)
    statistics_file_path = protocol_output(
        docstring=
        'The file path to the statistics sampled during the simulation.',
        type_hint=str)

    def __init__(self, protocol_id):

        super().__init__(protocol_id)

        self._checkpoint_path = None
        self._state_path = None

        self._local_trajectory_path = None
        self._local_statistics_path = None

        self._context = None
        self._integrator = None

    def execute(self, directory, available_resources):

        # We handle most things in OMM units here.
        temperature = self.thermodynamic_state.temperature
        openmm_temperature = pint_quantity_to_openmm(temperature)

        pressure = None if self.ensemble == Ensemble.NVT else self.thermodynamic_state.pressure
        openmm_pressure = pint_quantity_to_openmm(pressure)

        if openmm_temperature is None:

            return PropertyEstimatorException(
                directory=directory,
                message='A temperature must be set to perform '
                'a simulation in any ensemble')

        if Ensemble(self.ensemble) == Ensemble.NPT and openmm_pressure is None:

            return PropertyEstimatorException(
                directory=directory,
                message='A pressure must be set to perform an NPT simulation')

        if Ensemble(
                self.ensemble) == Ensemble.NPT and self.enable_pbc is False:

            return PropertyEstimatorException(
                directory=directory,
                message='PBC must be enabled when running in the NPT ensemble.'
            )

        logging.info('Performing a simulation in the ' + str(self.ensemble) +
                     ' ensemble: ' + self.id)

        # Set up the internal file paths
        self._checkpoint_path = os.path.join(directory, 'checkpoint.json')
        self._state_path = os.path.join(directory, 'checkpoint_state.xml')

        self._local_trajectory_path = os.path.join(directory, 'trajectory.dcd')
        self._local_statistics_path = os.path.join(directory,
                                                   'openmm_statistics.csv')

        # Set up the simulation objects.
        if self._context is None or self._integrator is None:

            self._context, self._integrator = self._setup_simulation_objects(
                openmm_temperature, openmm_pressure, available_resources)

        # Save a copy of the starting configuration if it doesn't already exist
        local_input_coordinate_path = os.path.join(directory, 'input.pdb')

        if not os.path.isfile(local_input_coordinate_path):

            input_pdb_file = app.PDBFile(self.input_coordinate_file)

            with open(local_input_coordinate_path, 'w+') as configuration_file:
                app.PDBFile.writeFile(input_pdb_file.topology,
                                      input_pdb_file.positions,
                                      configuration_file)

        # Run the simulation.
        result = self._simulate(directory, self._context, self._integrator)

        if isinstance(result, PropertyEstimatorException):
            return result

        # Set the output paths.
        self.trajectory_file_path = self._local_trajectory_path
        self.statistics_file_path = os.path.join(directory, 'statistics.csv')

        # Save out the final statistics in the property estimator format
        self._save_final_statistics(self.statistics_file_path, temperature,
                                    pressure)

        return self._get_output_dictionary()

    def _setup_simulation_objects(self, temperature, pressure,
                                  available_resources):
        """Initializes the objects needed to perform the simulation.
        This comprises of a context, and an integrator.

        Parameters
        ----------
        temperature: simtk.unit.Quantity
            The temperature to run the simulation at.
        pressure: simtk.unit.Quantity
            The pressure to run the simulation at.
        available_resources: ComputeResources
            The resources available to run on.

        Returns
        -------
        simtk.openmm.Context
            The created openmm context which takes advantage
            of the available compute resources.
        openmmtools.integrators.LangevinIntegrator
            The Langevin integrator which will propogate
            the simulation.
        """

        import openmmtools
        from simtk.openmm import XmlSerializer

        # Create a platform with the correct resources.
        if not self.allow_gpu_platforms:

            from propertyestimator.backends import ComputeResources
            available_resources = ComputeResources(
                available_resources.number_of_threads)

        platform = setup_platform_with_resources(available_resources,
                                                 self.high_precision)

        # Load in the system object from the provided xml file.
        with open(self.system_path, 'r') as file:
            system = XmlSerializer.deserialize(file.read())

        # Disable the periodic boundary conditions if requested.
        if not self.enable_pbc:

            disable_pbc(system)
            pressure = None

        # Use the openmmtools ThermodynamicState object to help
        # set up a system which contains the correct barostat if
        # one should be present.
        openmm_state = openmmtools.states.ThermodynamicState(
            system=system, temperature=temperature, pressure=pressure)

        system = openmm_state.get_system(remove_thermostat=True)

        # Set up the integrator.
        thermostat_friction = pint_quantity_to_openmm(self.thermostat_friction)
        timestep = pint_quantity_to_openmm(self.timestep)

        integrator = openmmtools.integrators.LangevinIntegrator(
            temperature=temperature,
            collision_rate=thermostat_friction,
            timestep=timestep)

        # Create the simulation context.
        context = openmm.Context(system, integrator, platform)

        # Initialize the context with the correct positions etc.
        input_pdb_file = app.PDBFile(self.input_coordinate_file)

        if self.enable_pbc:

            # Optionally set up the box vectors.
            box_vectors = input_pdb_file.topology.getPeriodicBoxVectors()

            if box_vectors is None:

                raise ValueError('The input file must contain box vectors '
                                 'when running with PBC.')

            context.setPeriodicBoxVectors(*box_vectors)

        context.setPositions(input_pdb_file.positions)
        context.setVelocitiesToTemperature(temperature)

        return context, integrator

    def _write_checkpoint_file(self, current_step_number, context):
        """Writes a simulation checkpoint file to disk.

        Parameters
        ----------
        current_step_number: int
            The total number of steps which have been taken so
            far.
        context: simtk.openmm.Context
            The current OpenMM context.
        """

        # Write the current state to disk
        state = context.getState(getPositions=True,
                                 getEnergy=True,
                                 getVelocities=True,
                                 getForces=True,
                                 getParameters=True,
                                 enforcePeriodicBox=self.enable_pbc)

        with open(self._state_path, 'w') as file:
            file.write(openmm.XmlSerializer.serialize(state))

        checkpoint = self._Checkpoint(self.output_frequency,
                                      self.checkpoint_frequency,
                                      self.steps_per_iteration,
                                      current_step_number)

        with open(self._checkpoint_path, 'w') as file:
            json.dump(checkpoint, file, cls=TypedJSONEncoder)

    def _truncate_statistics_file(self, number_of_frames):
        """Truncates the statistics file to the specified number
        of frames.

        Parameters
        ----------
        number_of_frames: int
            The number of frames to truncate to.
        """
        with open(self._local_statistics_path) as file:

            header_line = file.readline()
            file_contents = re.sub('#.*\n', '', file.read())

            with io.StringIO(file_contents) as string_object:
                existing_statistics_array = pd.read_csv(string_object,
                                                        index_col=False,
                                                        header=None)

        statistics_length = len(existing_statistics_array)

        if statistics_length < number_of_frames:

            raise ValueError(
                f'The saved number of statistics frames ({statistics_length}) '
                f'is less than expected ({number_of_frames}).')

        elif statistics_length == number_of_frames:
            return

        truncated_statistics_array = existing_statistics_array[
            0:number_of_frames]

        with open(self._local_statistics_path, 'w') as file:

            file.write(f'{header_line}')
            truncated_statistics_array.to_csv(file, index=False, header=False)

    def _truncate_trajectory_file(self, number_of_frames):
        """Truncates the trajectory file to the specified number
        of frames.

        Parameters
        ----------
        number_of_frames: int
            The number of frames to truncate to.
        """
        import mdtraj
        from mdtraj.formats.dcd import DCDTrajectoryFile
        from mdtraj.utils import in_units_of

        # Load in the required topology object.
        topology = mdtraj.load_topology(self.input_coordinate_file)

        # Parse the internal mdtraj distance unit. While private access is
        # undesirable, this is never publicly defined and I believe this
        # route to be preferable over hard coding this unit here.
        base_distance_unit = mdtraj.Trajectory._distance_unit

        # Get an accurate measurement of the length of the trajectory
        # without reading it into memory.
        trajectory_length = 0

        for chunk in mdtraj.iterload(self._local_trajectory_path,
                                     top=topology):
            trajectory_length += len(chunk)

        # Make sure there is at least the expected number of frames.
        if trajectory_length < number_of_frames:

            raise ValueError(
                f'The saved number of trajectory frames ({trajectory_length}) '
                f'is less than expected ({number_of_frames}).')

        elif trajectory_length == number_of_frames:
            return

        # Truncate the trajectory by streaming one frame of the trajectory at
        # a time.
        temporary_trajectory_path = f'{self._local_trajectory_path}.tmp'

        with DCDTrajectoryFile(self._local_trajectory_path, 'r') as input_file:

            with DCDTrajectoryFile(temporary_trajectory_path,
                                   'w') as output_file:

                for frame_index in range(0, number_of_frames):

                    frame = input_file.read_as_traj(topology,
                                                    n_frames=1,
                                                    stride=1)

                    output_file.write(
                        xyz=in_units_of(frame.xyz, base_distance_unit,
                                        output_file.distance_unit),
                        cell_lengths=in_units_of(frame.unitcell_lengths,
                                                 base_distance_unit,
                                                 output_file.distance_unit),
                        cell_angles=frame.unitcell_angles[0])

        os.replace(temporary_trajectory_path, self._local_trajectory_path)

        # Do a sanity check to make sure the trajectory was successfully truncated.
        new_trajectory_length = 0

        for chunk in mdtraj.iterload(self._local_trajectory_path,
                                     top=self.input_coordinate_file):
            new_trajectory_length += len(chunk)

        if new_trajectory_length != number_of_frames:
            raise ValueError('The trajectory was incorrectly truncated.')

    def _resume_from_checkpoint(self, context):
        """Resumes the simulation from a checkpoint file.

        Parameters
        ----------
        context: simtk.openmm.Context
            The current OpenMM context.

        Returns
        -------
        int
            The current step number.
        """
        current_step_number = 0

        # Check whether the checkpoint files actually exists.
        if (not os.path.isfile(self._checkpoint_path)
                or not os.path.isfile(self._state_path)):

            logging.info('No checkpoint files were found.')
            return current_step_number

        if (not os.path.isfile(self._local_statistics_path)
                or not os.path.isfile(self._local_trajectory_path)):

            raise ValueError(
                'Checkpoint files were correctly found, but the trajectory '
                'or statistics files seem to be missing. This should not happen.'
            )

        logging.info('Restoring the system state from checkpoint files.')

        # If they do, load the current state from disk.
        with open(self._state_path, 'r') as file:
            current_state = openmm.XmlSerializer.deserialize(file.read())

        with open(self._checkpoint_path, 'r') as file:
            checkpoint = json.load(file, cls=TypedJSONDecoder)

        if (self.output_frequency != checkpoint.output_frequency
                or self.checkpoint_frequency != checkpoint.checkpoint_frequency
                or self.steps_per_iteration != checkpoint.steps_per_iteration):

            raise ValueError('Neither the output frequency, the checkpoint '
                             'frequency, nor the steps per iteration can '
                             'currently be changed during the course of the '
                             'simulation. Only the number of iterations is '
                             'allowed to change.')

        # Make sure this simulation hasn't already finished.
        total_expected_number_of_steps = self.total_number_of_iterations * self.steps_per_iteration

        if checkpoint.current_step_number == total_expected_number_of_steps:
            return checkpoint.current_step_number

        context.setState(current_state)

        # Make sure that the number of frames in the trajectory /
        # statistics file correspond to the recorded number of steps.
        # This is to handle possible cases where only some of the files
        # have been written from the current step (i.e only the trajectory may
        # have been written to before this protocol gets unexpectedly killed.
        expected_number_of_frames = int(checkpoint.current_step_number /
                                        self.output_frequency)

        # Handle the truncation of the statistics file.
        self._truncate_statistics_file(expected_number_of_frames)

        # Handle the truncation of the trajectory file.
        self._truncate_trajectory_file(expected_number_of_frames)

        logging.info('System state restored from checkpoint files.')

        return checkpoint.current_step_number

    def _save_final_statistics(self, path, temperature, pressure):
        """Converts the openmm statistic csv file into a propertyestimator
        StatisticsArray csv file, making sure to fill in any missing entries.

        Parameters
        ----------
        path: str
            The path to save the statistics to.
        temperature: unit.Quantity
            The temperature that the simulation is being run at.
        pressure: unit.Quantity
            The pressure that the simulation is being run at.
        """
        statistics = StatisticsArray.from_openmm_csv(
            self._local_statistics_path, pressure)

        reduced_potentials = statistics[
            ObservableType.PotentialEnergy] / unit.avogadro_number

        if pressure is not None:

            pv_terms = pressure * statistics[ObservableType.Volume]
            reduced_potentials += pv_terms

        beta = 1.0 / (unit.boltzmann_constant * temperature)
        statistics[ObservableType.ReducedPotential] = (beta *
                                                       reduced_potentials).to(
                                                           unit.dimensionless)

        statistics.to_pandas_csv(path)

    def _simulate(self, directory, context, integrator):
        """Performs the simulation using a given context
        and integrator.

        Parameters
        ----------
        directory: str
            The directory the trajectory is being run in.
        context: simtk.openmm.Context
            The OpenMM context to run with.
        integrator: simtk.openmm.Integrator
            The integrator to evolve the simulation with.
        """

        # Define how many steps should be taken.
        total_number_of_steps = self.total_number_of_iterations * self.steps_per_iteration

        # Try to load the current state from any available checkpoint information
        current_step = self._resume_from_checkpoint(context)

        if current_step == total_number_of_steps:
            return None

        # Build the reporters which we will use to report the state
        # of the simulation.
        append_trajectory = os.path.isfile(self._local_trajectory_path)
        dcd_reporter = app.DCDReporter(self._local_trajectory_path, 0,
                                       append_trajectory)

        statistics_file = open(self._local_statistics_path, 'a+')

        statistics_reporter = app.StateDataReporter(statistics_file,
                                                    0,
                                                    step=True,
                                                    potentialEnergy=True,
                                                    kineticEnergy=True,
                                                    totalEnergy=True,
                                                    temperature=True,
                                                    volume=True,
                                                    density=True)

        # Create the object which will transfer simulation output to the
        # reporters.
        topology = app.PDBFile(self.input_coordinate_file).topology

        with open(self.system_path, 'r') as file:
            system = openmm.XmlSerializer.deserialize(file.read())

        simulation = self._Simulation(integrator, topology, system,
                                      current_step)

        # Perform the simulation.
        checkpoint_counter = 0

        try:

            while current_step < total_number_of_steps:

                steps_to_take = min(self.output_frequency,
                                    total_number_of_steps - current_step)
                integrator.step(steps_to_take)

                current_step += steps_to_take

                state = context.getState(getPositions=True,
                                         getEnergy=True,
                                         getVelocities=False,
                                         getForces=False,
                                         getParameters=False,
                                         enforcePeriodicBox=self.enable_pbc)

                simulation.currentStep = current_step

                # Write out the current state using the reporters.
                dcd_reporter.report(simulation, state)
                statistics_reporter.report(simulation, state)

                if checkpoint_counter >= self.checkpoint_frequency:
                    # Save to the checkpoint file if needed.
                    self._write_checkpoint_file(current_step, context)
                    checkpoint_counter = 0

                checkpoint_counter += 1

        except Exception as e:

            formatted_exception = f'{traceback.format_exception(None, e, e.__traceback__)}'

            return PropertyEstimatorException(
                directory=directory,
                message=f'The simulation failed unexpectedly: '
                f'{formatted_exception}')

        # Save out the final positions.
        self._write_checkpoint_file(current_step, context)

        final_state = context.getState(getPositions=True)

        positions = final_state.getPositions()
        topology.setPeriodicBoxVectors(final_state.getPeriodicBoxVectors())

        self.output_coordinate_file = os.path.join(directory, 'output.pdb')

        with open(self.output_coordinate_file, 'w+') as configuration_file:
            app.PDBFile.writeFile(topology, positions, configuration_file)

        logging.info(
            f'Simulation performed in the {str(self.ensemble)} ensemble: {self._id}'
        )
        return None
コード例 #21
0
class BaseBuildSystemProtocol(BaseProtocol):
    """The base for any protocol whose role is to apply a set of
    force field parameters to a given system.
    """
    class WaterModel(Enum):
        """An enum which describes which water model is being
        used, so that correct charges can be applied.

        Warnings
        --------
        This is only a temporary addition until full water model support
        is introduced.
        """
        TIP3P = 'TIP3P'

    force_field_path = protocol_input(
        docstring=
        'The file path to the force field parameters to assign to the system.',
        type_hint=str,
        default_value=UNDEFINED)
    coordinate_file_path = protocol_input(
        docstring='The file path to the PDB coordinate file which defines the '
        'topology of the system to which the force field parameters '
        'will be assigned.',
        type_hint=str,
        default_value=UNDEFINED)

    substance = protocol_input(docstring='The composition of the system.',
                               type_hint=Substance,
                               default_value=UNDEFINED)
    water_model = protocol_input(
        docstring=
        'The water model to apply, if any water molecules are present.',
        type_hint=WaterModel,
        default_value=WaterModel.TIP3P)

    system_path = protocol_output(
        docstring='The path to the assigned system object.', type_hint=str)

    @staticmethod
    def _build_tip3p_system(topology_molecule, cutoff, cell_vectors):
        """Builds a `simtk.openmm.System` object containing a single water model

        Parameters
        ----------
        topology_molecule: openforcefield.topology.TopologyMolecule
            The topology molecule which represents the water molecule
            in the full system.
        cutoff: simtk.unit.Quantity
            The non-bonded cutoff.
        cell_vectors: simtk.unit.Quantity
            The full system's cell vectors.

        Returns
        -------
        simtk.openmm.System
            The created system.
        """

        topology_atoms = list(topology_molecule.atoms)

        # Make sure the topology molecule is in the order we expect.
        assert len(topology_atoms) == 3

        assert topology_atoms[0].atom.element.symbol == 'O'
        assert topology_atoms[1].atom.element.symbol == 'H'
        assert topology_atoms[2].atom.element.symbol == 'H'

        force_field_path = get_data_filename('forcefield/tip3p.xml')
        water_pdb_path = get_data_filename('forcefield/tip3p.pdb')

        component_pdb_file = app.PDBFile(water_pdb_path)
        component_topology = component_pdb_file.topology
        component_topology.setUnitCellDimensions(cell_vectors)

        # Create the system object.
        force_field_template = app.ForceField(force_field_path)

        component_system = force_field_template.createSystem(
            topology=component_topology,
            nonbondedMethod=app.PME,
            nonbondedCutoff=cutoff,
            constraints=app.HBonds,
            rigidWater=True,
            removeCMMotion=False)

        return component_system

    @staticmethod
    def _append_system(existing_system, system_to_append):
        """Appends a system object onto the end of an existing system.

        Parameters
        ----------
        existing_system: simtk.openmm.System
            The base system to extend.
        system_to_append: simtk.openmm.System
            The system to append.
        """
        supported_force_types = [
            openmm.HarmonicBondForce,
            openmm.HarmonicAngleForce,
            openmm.PeriodicTorsionForce,
            openmm.NonbondedForce,
        ]

        number_of_appended_forces = 0
        index_offset = existing_system.getNumParticles()

        # Append the particles.
        for index in range(system_to_append.getNumParticles()):
            existing_system.addParticle(
                system_to_append.getParticleMass(index))

        # Append the constraints
        for index in range(system_to_append.getNumConstraints()):

            index_a, index_b, distance = system_to_append.getConstraintParameters(
                index)
            existing_system.addConstraint(index_a + index_offset,
                                          index_b + index_offset, distance)

        # Validate the forces to append.
        for force_to_append in system_to_append.getForces():

            if type(force_to_append) in supported_force_types:
                continue

            raise ValueError(f'The system contains an unsupported type of '
                             f'force: {type(force_to_append)}.')

        # Append the forces.
        for force_to_append in system_to_append.getForces():

            existing_force = None

            for force in existing_system.getForces():

                if type(force) not in supported_force_types:

                    raise ValueError(
                        f'The existing system contains an unsupported type '
                        f'of force: {type(force)}.')

                if type(force_to_append) != type(force):
                    continue

                existing_force = force
                break

            if existing_force is None:

                existing_force = type(force_to_append)()
                existing_system.addForce(existing_force)

            if isinstance(force_to_append, openmm.HarmonicBondForce):

                # Add the bonds.
                for index in range(force_to_append.getNumBonds()):

                    index_a, index_b, *parameters = force_to_append.getBondParameters(
                        index)
                    existing_force.addBond(index_a + index_offset,
                                           index_b + index_offset, *parameters)

            elif isinstance(force_to_append, openmm.HarmonicAngleForce):

                # Add the angles.
                for index in range(force_to_append.getNumAngles()):

                    index_a, index_b, index_c, *parameters = force_to_append.getAngleParameters(
                        index)
                    existing_force.addAngle(index_a + index_offset,
                                            index_b + index_offset,
                                            index_c + index_offset,
                                            *parameters)

            elif isinstance(force_to_append, openmm.PeriodicTorsionForce):

                # Add the torsions.
                for index in range(force_to_append.getNumTorsions()):

                    index_a, index_b, index_c, index_d, *parameters = force_to_append.getTorsionParameters(
                        index)
                    existing_force.addTorsion(index_a + index_offset,
                                              index_b + index_offset,
                                              index_c + index_offset,
                                              index_d + index_offset,
                                              *parameters)

            elif isinstance(force_to_append, openmm.NonbondedForce):

                # Add the vdW parameters
                for index in range(force_to_append.getNumParticles()):
                    existing_force.addParticle(
                        *force_to_append.getParticleParameters(index))

                # Add the 1-2, 1-3 and 1-4 exceptions.
                for index in range(force_to_append.getNumExceptions()):

                    index_a, index_b, *parameters = force_to_append.getExceptionParameters(
                        index)
                    existing_force.addException(index_a + index_offset,
                                                index_b + index_offset,
                                                *parameters)

            number_of_appended_forces += 1

        if number_of_appended_forces != system_to_append.getNumForces():
            raise ValueError('Not all forces were appended.')

    def execute(self, directory, available_resources):
        raise NotImplementedError()
コード例 #22
0
class UnpackStoredSimulationData(BaseProtocol):
    """Loads a `StoredSimulationData` object from disk,
    and makes its attributes easily accessible to other protocols.
    """

    simulation_data_path = protocol_input(
        docstring=
        'A list / tuple which contains both the path to the simulation data '
        'object, it\'s ancillary data directory, and the force field which '
        'was used to generate the stored data.',
        type_hint=Union[list, tuple],
        default_value=UNDEFINED)

    substance = protocol_output(docstring='The substance which was stored.',
                                type_hint=Substance)

    total_number_of_molecules = protocol_output(
        docstring='The total number of molecules in the stored system.',
        type_hint=int)

    thermodynamic_state = protocol_output(
        docstring='The thermodynamic state which was stored.',
        type_hint=ThermodynamicState)

    statistical_inefficiency = protocol_output(
        docstring='The statistical inefficiency of the stored data.',
        type_hint=float)

    coordinate_file_path = protocol_output(
        docstring='A path to the stored simulation output coordinates.',
        type_hint=str)
    trajectory_file_path = protocol_output(
        docstring='A path to the stored simulation trajectory.', type_hint=str)
    statistics_file_path = protocol_output(
        docstring='A path to the stored simulation statistics array.',
        type_hint=str)

    force_field_path = protocol_output(
        docstring=
        'A path to the force field parameters used to generate the stored data.',
        type_hint=str)

    def execute(self, directory, available_resources):

        if len(self.simulation_data_path) != 3:

            return PropertyEstimatorException(
                directory=directory,
                message='The simulation data path should be a tuple '
                'of a path to the data object, directory, and a path '
                'to the force field used to generate it.')

        data_object_path = self.simulation_data_path[0]
        data_directory = self.simulation_data_path[1]
        force_field_path = self.simulation_data_path[2]

        if not path.isdir(data_directory):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data directory'
                'is invalid: {}'.format(data_directory))

        if not path.isfile(force_field_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the force field'
                'is invalid: {}'.format(force_field_path))

        with open(data_object_path, 'r') as file:
            data_object = json.load(file, cls=TypedJSONDecoder)

        self.substance = data_object.substance
        self.total_number_of_molecules = data_object.total_number_of_molecules

        self.thermodynamic_state = data_object.thermodynamic_state

        self.statistical_inefficiency = data_object.statistical_inefficiency

        self.coordinate_file_path = path.join(data_directory,
                                              data_object.coordinate_file_name)
        self.trajectory_file_path = path.join(data_directory,
                                              data_object.trajectory_file_name)

        self.statistics_file_path = path.join(data_directory,
                                              data_object.statistics_file_name)

        self.force_field_path = force_field_path

        return self._get_output_dictionary()
コード例 #23
0
class FilterSubstanceByRole(BaseProtocol):
    """A protocol which takes a substance as input, and returns a substance which only
    contains components whose role match a given criteria.
    """

    input_substance = protocol_input(docstring='The substance to filter.',
                                     type_hint=Substance,
                                     default_value=UNDEFINED)

    component_role = protocol_input(
        docstring='The role to filter substance components against.',
        type_hint=Substance.ComponentRole,
        default_value=UNDEFINED)

    expected_components = protocol_input(
        docstring='The number of components expected to remain after filtering. '
        'An exception is raised if this number is not matched.',
        type_hint=int,
        default_value=UNDEFINED,
        optional=True)

    filtered_substance = protocol_output(docstring='The filtered substance.',
                                         type_hint=Substance)

    def execute(self, directory, available_resources):

        filtered_components = []
        total_mole_fraction = 0.0

        for component in self.input_substance.components:

            if component.role != self.component_role:
                continue

            filtered_components.append(component)

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if not isinstance(amount, Substance.MoleFraction):
                    continue

                total_mole_fraction += amount.value

        if (self.expected_components != UNDEFINED
                and self.expected_components != len(filtered_components)):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The filtered substance does not contain the expected '
                f'number of components ({self.expected_components}) - '
                f'{filtered_components}')

        inverse_mole_fraction = 1.0 if np.isclose(
            total_mole_fraction, 0.0) else 1.0 / total_mole_fraction

        self.filtered_substance = Substance()

        for component in filtered_components:

            amounts = self.input_substance.get_amounts(component)

            for amount in amounts:

                if isinstance(amount, Substance.MoleFraction):
                    amount = Substance.MoleFraction(amount.value *
                                                    inverse_mole_fraction)

                self.filtered_substance.add_component(component, amount)

        return self._get_output_dictionary()
コード例 #24
0
class WeightByMoleFraction(BaseProtocol):
    """Multiplies a value by the mole fraction of a component
    in a `Substance`.
    """

    value = protocol_input(docstring='The value to be weighted.',
                           type_hint=typing.Union[float, int,
                                                  EstimatedQuantity,
                                                  unit.Quantity,
                                                  ParameterGradient],
                           default_value=UNDEFINED)

    component = protocol_input(
        docstring='The component whose mole fraction to weight by.',
        type_hint=Substance,
        default_value=UNDEFINED)
    full_substance = protocol_input(
        docstring=
        'The full substance which describes the mole fraction of the component.',
        type_hint=Substance,
        default_value=UNDEFINED)

    weighted_value = protocol_output(
        'The value weighted by the `component`s mole fraction as determined from the '
        '`full_substance`.',
        type_hint=typing.Union[float, int, EstimatedQuantity, unit.Quantity,
                               ParameterGradient])

    def _weight_values(self, mole_fraction):
        """Weights a value by a components mole fraction.

        Parameters
        ----------
        mole_fraction: float
            The mole fraction to weight by.

        Returns
        -------
        float, int, EstimatedQuantity, unit.Quantity, ParameterGradient
            The weighted value.
        """
        return self.value * mole_fraction

    def execute(self, directory, available_resources):

        assert len(self.component.components) == 1

        main_component = self.component.components[0]
        amounts = self.full_substance.get_amounts(main_component)

        if len(amounts) != 1:

            return PropertyEstimatorException(
                directory=directory,
                message=
                f'More than one type of amount was defined for component '
                f'{main_component}. Only a single mole fraction must be '
                f'defined.')

        amount = next(iter(amounts))

        if not isinstance(amount, Substance.MoleFraction):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The component {main_component} was given as an '
                f'exact amount, and not a mole fraction')

        self.weighted_value = self._weight_values(amount.value)
        return self._get_output_dictionary()
コード例 #25
0
class ConditionalGroup(ProtocolGroup):
    """A collection of protocols which are to execute until
    a given condition is met.
    """
    @unique
    class ConditionType(Enum):
        """The acceptable conditions to place on the group"""
        LessThan = 'lessthan'
        GreaterThan = 'greaterthan'

        @classmethod
        def has_value(cls, value):
            """Checks whether an of the enum items matches a given value.

            Parameters
            ----------
            value: str
                The value to check for.

            Returns
            ---------
            bool
                True if the enum contains the value.
            """
            return any(value == item.value for item in cls)

    class Condition:
        def __init__(self):

            self.type = ConditionalGroup.ConditionType.LessThan

            self.left_hand_value = None
            self.right_hand_value = None

        def __getstate__(self):

            return {
                'type': self.type.value,
                'left_hand_value': self.left_hand_value,
                'right_hand_value': self.right_hand_value
            }

        def __setstate__(self, state):

            self.type = ConditionalGroup.ConditionType(state['type'])

            self.left_hand_value = state['left_hand_value']
            self.right_hand_value = state['right_hand_value']

        def __eq__(self, other):

            return (self.left_hand_value == other.left_hand_value
                    and self.right_hand_value == other.right_hand_value
                    and self.type == other.type)

        def __ne__(self, other):
            return not self.__eq__(other)

        def __str__(self):
            return f'{self.left_hand_value} {self.type} {self.right_hand_value}'

    @property
    def conditions(self):
        return self._conditions

    max_iterations = protocol_input(
        docstring=
        'The maximum number of iterations to run for to try and satisfy the '
        'groups conditions.',
        type_hint=int,
        default_value=100,
        merge_behavior=InequalityMergeBehaviour.LargestValue)

    current_iteration = protocol_output(
        docstring=
        'The current number of iterations this group has performed while '
        'attempting to satisfy the specified conditions. This value starts '
        'from one.',
        type_hint=int)

    def __init__(self, protocol_id):
        """Constructs a new ConditionalGroup
        """
        self._conditions = []
        super().__init__(protocol_id)

    def _initialize(self):
        """Initialize the protocol."""

        super(ConditionalGroup, self)._initialize()
        self.required_inputs.append(ProtocolPath('conditions'))

    def _set_schema(self, schema_value):

        conditions = None

        if '.conditions' in schema_value.inputs:
            conditions = schema_value.inputs.pop('.conditions')

            for condition in conditions:
                self.add_condition(copy.deepcopy(condition))

        super(ConditionalGroup, self)._set_schema(schema_value)

        if conditions is not None:
            schema_value.inputs['.conditions'] = conditions

    def _evaluate_condition(self, condition):
        """Evaluates whether a condition has been successfully met.

        Parameters
        ----------
        condition: ConditionalGroup.Condition
            The condition to evaluate.

        Returns
        -------
        bool
            True if the condition has been met.
        """

        if not isinstance(condition.left_hand_value, ProtocolPath):
            left_hand_value = condition.left_hand_value
        else:
            left_hand_value = self.get_value(condition.left_hand_value)

        if not isinstance(condition.right_hand_value, ProtocolPath):
            right_hand_value = condition.right_hand_value
        else:
            right_hand_value = self.get_value(condition.right_hand_value)

        if left_hand_value is None or right_hand_value is None:
            return False

        right_hand_value_correct_units = right_hand_value

        if isinstance(right_hand_value, unit.Quantity) and isinstance(
                left_hand_value, unit.Quantity):
            right_hand_value_correct_units = right_hand_value.to(
                left_hand_value.units)

        logging.info(
            f'Evaluating condition for protocol {self.id}: '
            f'{left_hand_value} {condition.type} {right_hand_value_correct_units}'
        )

        if condition.type == self.ConditionType.LessThan:
            return left_hand_value < right_hand_value
        elif condition.type == self.ConditionType.GreaterThan:
            return left_hand_value > right_hand_value

        raise NotImplementedError()

    @staticmethod
    def _write_checkpoint(directory, current_iteration):
        """Creates a checkpoint file for this group so that it can continue
        executing where it left off if it was killed for some reason (e.g the
        worker it was running on was killed).

        Parameters
        ----------
        directory: str
            The path to the working directory of this protocol
        current_iteration: int
            The number of iterations this group has performed so far.
        """

        checkpoint_path = path.join(directory, 'checkpoint.json')

        with open(checkpoint_path, 'w') as file:
            json.dump({'current_iteration': current_iteration}, file)

    @staticmethod
    def _read_checkpoint(directory):
        """Creates a checkpoint file for this group so that it can continue
        executing where it left off if it was killed for some reason (e.g the
        worker it was running on was killed).

        Parameters
        ----------
        directory: str
            The path to the working directory of this protocol

        Returns
        -------
        int
            The number of iterations this group has performed so far.
        """

        current_iteration = 0
        checkpoint_path = path.join(directory, 'checkpoint.json')

        if not path.isfile(checkpoint_path):
            return current_iteration

        with open(checkpoint_path, 'r') as file:

            checkpoint_dictionary = json.load(file)
            current_iteration = checkpoint_dictionary['current_iteration']

        return current_iteration

    def execute(self, directory, available_resources):
        """Executes the protocols within this groups

        Parameters
        ----------
        directory : str
            The root directory in which to run the protocols
        available_resources: ComputeResources
            The resources available to execute on.

        Returns
        -------
        bool
            True if all the protocols execute correctly.
        """

        logging.info('Starting conditional while loop: {}'.format(self.id))

        should_continue = True
        self.current_iteration = self._read_checkpoint(directory)

        while should_continue:

            # Create a checkpoint file so we can pick off where
            # we left off if this execution fails due to time
            # constraints for e.g.
            self._write_checkpoint(directory, self.current_iteration)
            self.current_iteration += 1

            return_value = super(ConditionalGroup,
                                 self).execute(directory, available_resources)

            if isinstance(return_value, PropertyEstimatorException):
                # Exit on exceptions.
                return return_value

            conditions_met = True

            for condition in self._conditions:

                # Check to see if we have reached our goal.
                if not self._evaluate_condition(condition):
                    conditions_met = False

            if conditions_met:

                logging.info(
                    f'Conditional while loop finished after {self.current_iteration} iterations: {self.id}'
                )
                return return_value

            if self.current_iteration >= self.max_iterations:

                return PropertyEstimatorException(
                    directory=directory,
                    message=
                    f'Conditional while loop failed to converge: {self.id}')

            logging.info(
                f'Conditional criteria not yet met after {self.current_iteration} iterations'
            )

    def can_merge(self, other, path_replacements=None):
        return super(ConditionalGroup, self).can_merge(other,
                                                       path_replacements)

    def merge(self, other):
        """Merges another ProtocolGroup with this one. The id
        of this protocol will remain unchanged.

        It is assumed that can_merge has already returned that
        these protocol groups are compatible to be merged together.

        Parameters
        ----------
        other: ConditionalGroup
            The protocol to merge into this one.
        """
        merged_ids = super(ConditionalGroup, self).merge(other)

        for condition in other.conditions:

            if isinstance(condition.left_hand_value, ProtocolPath):
                condition.left_hand_value.replace_protocol(other.id, self.id)
            if isinstance(condition.right_hand_value, ProtocolPath):
                condition.right_hand_value.replace_protocol(other.id, self.id)

            for merged_id in merged_ids:

                if isinstance(condition.left_hand_value, ProtocolPath):
                    condition.left_hand_value.replace_protocol(
                        merged_id, merged_ids[merged_id])
                if isinstance(condition.right_hand_value, ProtocolPath):
                    condition.right_hand_value.replace_protocol(
                        merged_id, merged_ids[merged_id])

            self.add_condition(condition)

        return merged_ids

    def add_condition(self, condition_to_add):
        """Adds a condition to this groups list of conditions if it
        not already in the condition list.

        Parameters
        ----------
        condition_to_add: :obj:`ConditionalGroup.Condition`
            The condition to add.
        """

        for condition in self._conditions:

            if condition == condition_to_add:
                return

        self._conditions.append(condition_to_add)

    def set_uuid(self, value):
        """Store the uuid of the calculation this protocol belongs to

        Parameters
        ----------
        value : str
            The uuid of the parent calculation.
        """
        super(ConditionalGroup, self).set_uuid(value)

        for condition in self._conditions:

            if isinstance(condition.left_hand_value, ProtocolPath):
                condition.left_hand_value.append_uuid(value)

            if isinstance(condition.right_hand_value, ProtocolPath):
                condition.right_hand_value.append_uuid(value)

    def replace_protocol(self, old_id, new_id):
        """Finds each input which came from a given protocol
         and redirects it to instead take input from a different one.

        Parameters
        ----------
        old_id : str
            The id of the old input protocol.
        new_id : str
            The id of the new input protocol.
        """
        super(ConditionalGroup, self).replace_protocol(old_id, new_id)

        for condition in self._conditions:

            if isinstance(condition.left_hand_value, ProtocolPath):
                condition.left_hand_value.replace_protocol(old_id, new_id)

            if isinstance(condition.right_hand_value, ProtocolPath):
                condition.right_hand_value.replace_protocol(old_id, new_id)

    def get_class_attribute(self, reference_path):

        if reference_path.start_protocol is None or (
                reference_path.start_protocol == self.id
                and reference_path.last_protocol == self.id):

            if reference_path.property_name == 'conditions' or reference_path.property_name.find(
                    'condition_') >= 0:
                return None

        return super(ConditionalGroup,
                     self).get_class_attribute(reference_path)

    def get_value(self, reference_path):
        """Returns the value of one of this protocols parameters / inputs.

        Parameters
        ----------
        reference_path: ProtocolPath
            The path pointing to the value to return.

        Returns
        ----------
        object:
            The value of the input
        """

        if reference_path.start_protocol is None or (
                reference_path.start_protocol == self.id
                and reference_path.last_protocol == self.id):

            if reference_path.property_name == 'conditions':
                return self._conditions

        return super(ConditionalGroup, self).get_value(reference_path)

    def set_value(self, reference_path, value):
        """Sets the value of one of this protocols parameters / inputs.

        Parameters
        ----------
        reference_path: ProtocolPath
            The path pointing to the value to return.
        value: Any
            The value to set.
        """

        if reference_path.start_protocol is None or (
                reference_path.start_protocol == self.id
                and reference_path.last_protocol == self.id):

            if reference_path.property_name == 'conditions':

                self._conditions = value
                return

        super(ConditionalGroup, self).set_value(reference_path, value)

    def get_value_references(self, input_path):

        if input_path.property_name != 'conditions':
            return super(ConditionalGroup,
                         self).get_value_references(input_path)

        value_references = {}

        for index, condition in enumerate(self.conditions):

            if isinstance(condition.left_hand_value, ProtocolPath):

                source_path = ProtocolPath(
                    'conditions[{}].left_hand_value'.format(index))
                value_references[source_path] = condition.left_hand_value

            if isinstance(condition.right_hand_value, ProtocolPath):

                source_path = ProtocolPath(
                    'conditions[{}].right_hand_value'.format(index))
                value_references[source_path] = condition.right_hand_value

        return value_references
コード例 #26
0
class ExtractAverageDielectric(analysis.AverageTrajectoryProperty):
    """Extracts the average dielectric constant from a simulation trajectory.
    """

    system_path = protocol_input(
        docstring=
        'The path to the XML system object which defines the forces present in the system.',
        type_hint=str,
        default_value=UNDEFINED)
    thermodynamic_state = protocol_input(
        docstring=
        'The thermodynamic state at which the trajectory was generated.',
        type_hint=ThermodynamicState,
        default_value=UNDEFINED)

    dipole_moments = protocol_output(
        docstring=
        'The raw (possibly correlated) dipole moments which were used in '
        'the dielectric calculation.',
        type_hint=unit.Quantity)
    volumes = protocol_output(
        docstring=
        'The raw (possibly correlated) which were used in the dielectric calculation.',
        type_hint=unit.Quantity)

    uncorrelated_volumes = protocol_output(
        docstring='The uncorrelated volumes which were used in the dielectric '
        'calculation.',
        type_hint=unit.Quantity)

    def _bootstrap_function(self, **sample_kwargs):
        """Calculates the static dielectric constant from an
        array of dipoles and volumes.

        Notes
        -----
        The static dielectric constant is taken from for Equation 7 of [1]

        References
        ----------
        [1] A. Glattli, X. Daura and W. F. van Gunsteren. Derivation of an improved simple point charge
            model for liquid water: SPC/A and SPC/L. J. Chem. Phys. 116(22):9811-9828, 2002

        Parameters
        ----------
        sample_kwargs: dict of str and np.ndarray
            A key words dictionary of the bootstrap sample data, where the
            sample data is a numpy array of shape=(num_frames, num_dimensions)
            with dtype=float. The kwargs should include the dipole moment and
            the system volume

        Returns
        -------
        float
            The unitless static dielectric constant
        """

        dipole_moments = sample_kwargs['dipoles']
        volumes = sample_kwargs['volumes']

        temperature = self.thermodynamic_state.temperature

        dipole_mu = dipole_moments.mean(0)
        shifted_dipoles = dipole_moments - dipole_mu

        dipole_variance = (shifted_dipoles * shifted_dipoles).sum(-1).mean(0) * \
                          (unit.elementary_charge * unit.nanometers) ** 2

        volume = volumes.mean() * unit.nanometer**3

        e0 = 8.854187817E-12 * unit.farad / unit.meter  # Taken from QCElemental

        dielectric_constant = 1.0 + dipole_variance / (
            3 * unit.boltzmann_constant * temperature * volume * e0)

        return dielectric_constant

    def _extract_charges(self):
        """Extracts all of the charges from a system object.

        Returns
        -------
        list of float
        """
        from simtk import unit as simtk_unit

        charge_list = []

        with open(self._system_path, 'r') as file:
            system = XmlSerializer.deserialize(file.read())

        for force_index in range(system.getNumForces()):

            force = system.getForce(force_index)

            if not isinstance(force, openmm.NonbondedForce):
                continue

            for atom_index in range(force.getNumParticles()):
                charge = force.getParticleParameters(atom_index)[0]
                charge = charge.value_in_unit(simtk_unit.elementary_charge)

                charge_list.append(charge)

        return charge_list

    def _extract_dipoles_and_volumes(self):
        """Extract the systems dipole moments and volumes.

        Returns
        -------
        numpy.ndarray
            The dipole moments of the trajectory (shape=(n_frames, 3), dtype=float)
        numpy.ndarray
            The volumes of the trajectory (shape=(n_frames, 1), dtype=float)
        """
        import mdtraj

        dipole_moments = []
        volumes = []
        charge_list = self._extract_charges()

        for chunk in mdtraj.iterload(self.trajectory_path,
                                     top=self.input_coordinate_file,
                                     chunk=50):

            dipole_moments.extend(
                mdtraj.geometry.dipole_moments(chunk, charge_list))
            volumes.extend(chunk.unitcell_volumes)

        dipole_moments = np.array(dipole_moments)
        volumes = np.array(volumes)

        return dipole_moments, volumes

    def execute(self, directory, available_resources):

        logging.info('Extracting dielectrics: ' + self.id)

        base_exception = super(ExtractAverageDielectric,
                               self).execute(directory, available_resources)

        if isinstance(base_exception, ExtractAverageDielectric):
            return base_exception

        # Extract the dipoles
        dipole_moments, volumes = self._extract_dipoles_and_volumes()
        self.dipole_moments = dipole_moments * unit.dimensionless

        dipole_moments, self.equilibration_index, self.statistical_inefficiency = \
            timeseries.decorrelate_time_series(dipole_moments)

        uncorrelated_length = len(volumes) - self.equilibration_index

        sample_indices = timeseries.get_uncorrelated_indices(
            uncorrelated_length, self.statistical_inefficiency)
        sample_indices = [
            index + self.equilibration_index for index in sample_indices
        ]

        self.volumes = volumes * unit.nanometer**3
        uncorrelated_volumes = volumes[sample_indices]

        self.uncorrelated_values = dipole_moments * unit.dimensionless
        self.uncorrelated_volumes = uncorrelated_volumes * unit.nanometer**3

        value, uncertainty = bootstrap(self._bootstrap_function,
                                       self.bootstrap_iterations,
                                       self.bootstrap_sample_size,
                                       dipoles=dipole_moments,
                                       volumes=uncorrelated_volumes)

        self.value = EstimatedQuantity(value * unit.dimensionless,
                                       uncertainty * unit.dimensionless,
                                       self.id)

        logging.info('Extracted dielectrics: ' + self.id)

        return self._get_output_dictionary()
コード例 #27
0
class ReweightDielectricConstant(reweighting.BaseMBARProtocol):
    """Reweights a set of dipole moments (`reference_observables`) and volumes
    (`reference_volumes`) using MBAR, and then combines these to yeild the reweighted
    dielectric constant. Uncertainties in the dielectric constant are determined
    by bootstrapping.
    """

    reference_dipole_moments = protocol_input(
        docstring='A Quantity wrapped np.ndarray of the dipole moments of each '
        'of the reference states.',
        type_hint=list,
        default_value=UNDEFINED)
    reference_volumes = protocol_input(
        docstring='A Quantity wrapped np.ndarray of the volumes of each of the '
        'reference states.',
        type_hint=list,
        default_value=UNDEFINED)

    thermodynamic_state = protocol_input(
        docstring=
        'The thermodynamic state at which the trajectory was generated.',
        type_hint=ThermodynamicState,
        default_value=UNDEFINED)

    def __init__(self, protocol_id):
        super().__init__(protocol_id)
        self.bootstrap_uncertainties = True

    def _bootstrap_function(self, reference_reduced_potentials,
                            target_reduced_potentials,
                            **reference_observables):

        assert len(reference_observables) == 3

        transposed_observables = {}

        for key in reference_observables:
            transposed_observables[key] = np.transpose(
                reference_observables[key])

        values, _, _ = self._reweight_observables(
            np.transpose(reference_reduced_potentials),
            np.transpose(target_reduced_potentials), **transposed_observables)

        average_squared_dipole = values['dipoles_sqr']
        average_dipole_squared = np.linalg.norm(values['dipoles'])

        dipole_variance = (average_squared_dipole - average_dipole_squared) * \
                          (unit.elementary_charge * unit.nanometers) ** 2

        volume = values['volumes'] * unit.nanometer**3

        e0 = 8.854187817E-12 * unit.farad / unit.meter  # Taken from QCElemental

        dielectric_constant = 1.0 + dipole_variance / (
            3 * unit.boltzmann_constant *
            self.thermodynamic_state.temperature * volume * e0)

        return dielectric_constant

    def execute(self, directory, available_resources):

        logging.info('Reweighting dielectric: {}'.format(self.id))

        if len(self.reference_dipole_moments) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no dipole moments to reweight.')

        if len(self.reference_volumes) == 0:
            return PropertyEstimatorException(
                directory=directory,
                message='There were no volumes to reweight.')

        if (not isinstance(self.reference_dipole_moments[0], unit.Quantity)
                or not isinstance(self.reference_volumes[0], unit.Quantity)):

            return PropertyEstimatorException(
                directory=directory,
                message='The reference observables should be '
                'a list of unit.Quantity wrapped ndarray\'s.')

        if len(self.reference_dipole_moments) != len(self.reference_volumes):
            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        for reference_dipoles, reference_volumes in zip(
                self.reference_dipole_moments, self.reference_volumes):

            if len(reference_dipoles) == len(reference_volumes):
                continue

            return PropertyEstimatorException(
                directory=directory,
                message='The number of reference dipoles does '
                'not match the number of reference volumes.')

        self._reference_observables = self.reference_dipole_moments

        dipole_moments = self._prepare_observables_array(
            self.reference_dipole_moments)
        dipole_moments_sqr = np.array([[
            np.dot(dipole, dipole) for dipole in np.transpose(dipole_moments)
        ]])

        volumes = self._prepare_observables_array(self.reference_volumes)

        if self.bootstrap_uncertainties:
            error = self._execute_with_bootstrapping(
                unit.dimensionless,
                dipoles=dipole_moments,
                dipoles_sqr=dipole_moments_sqr,
                volumes=volumes)
        else:

            return PropertyEstimatorException(
                directory=directory,
                message=
                'Dielectric constant can only be reweighted in conjunction '
                'with bootstrapped uncertainties.')

        if error is not None:

            error.directory = directory
            return error

        return self._get_output_dictionary()
コード例 #28
0
class BuildTLeapSystem(BaseBuildSystemProtocol):
    """Parametrise a set of molecules with an Amber based force field.
    using the `tleap package <http://ambermd.org/AmberTools.php>`_.

    Notes
    -----
    * This protocol is currently a work in progress and as such has limited
      functionality compared to the more established `BuildSmirnoffSystem` protocol.
    * This protocol requires the optional `ambertools ==19.0` dependency to be installed.
    """
    class ChargeBackend(Enum):
        """The framework to use to assign partial charges.
        """
        OpenEye = 'OpenEye'
        AmberTools = 'AmberTools'

    charge_backend = protocol_input(
        docstring='The backend framework to use to assign partial charges.',
        type_hint=ChargeBackend,
        default_value=ChargeBackend.OpenEye)

    @staticmethod
    def _topology_molecule_to_mol2(topology_molecule, file_name,
                                   charge_backend):
        """Converts an `openforcefield.topology.TopologyMolecule` into a mol2 file,
        generating a conformer and AM1BCC charges in the process.

        .. todo :: This function uses non-public methods from the Open Force Field toolkit
                   and should be refactored when public methods become available

        Parameters
        ----------
        topology_molecule: openforcefield.topology.TopologyMolecule
            The `TopologyMolecule` to write out as a mol2 file. The atom ordering in
            this mol2 will be consistent with the topology ordering.
        file_name: str
            The filename to write to.
        charge_backend: BuildTLeapSystem.ChargeBackend
            The backend to use for conformer generation and partial charge
            calculation.
        """
        from openforcefield.topology import Molecule
        from simtk import unit as simtk_unit

        # Make a copy of the reference molecule so we can run conf gen / charge calc without modifying the original
        reference_molecule = copy.deepcopy(
            topology_molecule.reference_molecule)

        if charge_backend == BuildTLeapSystem.ChargeBackend.OpenEye:

            from openforcefield.utils.toolkits import OpenEyeToolkitWrapper

            toolkit_wrapper = OpenEyeToolkitWrapper()
            reference_molecule.generate_conformers(
                toolkit_registry=toolkit_wrapper)
            reference_molecule.compute_partial_charges_am1bcc(
                toolkit_registry=toolkit_wrapper)

        elif charge_backend == BuildTLeapSystem.ChargeBackend.AmberTools:

            from openforcefield.utils.toolkits import RDKitToolkitWrapper, AmberToolsToolkitWrapper, ToolkitRegistry

            toolkit_wrapper = ToolkitRegistry(toolkit_precedence=[
                RDKitToolkitWrapper, AmberToolsToolkitWrapper
            ])
            reference_molecule.generate_conformers(
                toolkit_registry=toolkit_wrapper)
            reference_molecule.compute_partial_charges_am1bcc(
                toolkit_registry=toolkit_wrapper)

        else:
            raise ValueError(f'Invalid toolkit specification.')

        # Get access to the parent topology, so we can look up the topology atom indices later.
        topology = topology_molecule.topology

        # Make and populate a new openforcefield.topology.Molecule
        new_molecule = Molecule()
        new_molecule.name = reference_molecule.name

        # Add atoms to the new molecule in the correct order
        for topology_atom in topology_molecule.atoms:

            # Force the topology to cache the topology molecule start indices
            topology.atom(topology_atom.topology_atom_index)

            new_molecule.add_atom(topology_atom.atom.atomic_number,
                                  topology_atom.atom.formal_charge,
                                  topology_atom.atom.is_aromatic,
                                  topology_atom.atom.stereochemistry,
                                  topology_atom.atom.name)

        # Add bonds to the new molecule
        for topology_bond in topology_molecule.bonds:

            # This is a temporary workaround to figure out what the "local" atom index of
            # these atoms is. In other words it is the offset we need to apply to get the
            # index if this were the only molecule in the whole Topology. We need to apply
            # this offset because `new_molecule` begins its atom indexing at 0, not the
            # real topology atom index (which we do know).
            index_offset = topology_molecule._atom_start_topology_index

            # Convert the `.atoms` generator into a list so we can access it by index
            topology_atoms = list(topology_bond.atoms)

            new_molecule.add_bond(
                topology_atoms[0].topology_atom_index - index_offset,
                topology_atoms[1].topology_atom_index - index_offset,
                topology_bond.bond.bond_order,
                topology_bond.bond.is_aromatic,
                topology_bond.bond.stereochemistry,
            )

        # Transfer over existing conformers and partial charges, accounting for the
        # reference/topology indexing differences
        new_conformers = np.zeros((reference_molecule.n_atoms, 3))
        new_charges = np.zeros(reference_molecule.n_atoms)

        # Then iterate over the reference atoms, mapping their indices to the topology
        # molecule's indexing system
        for reference_atom_index in range(reference_molecule.n_atoms):
            # We don't need to apply the offset here, since _ref_to_top_index is
            # already "locally" indexed for this topology molecule
            local_top_index = topology_molecule._ref_to_top_index[
                reference_atom_index]

            new_conformers[local_top_index, :] = reference_molecule.conformers[
                0][reference_atom_index].value_in_unit(simtk_unit.angstrom)
            new_charges[local_top_index] = reference_molecule.partial_charges[
                reference_atom_index].value_in_unit(
                    simtk_unit.elementary_charge)

        # Reattach the units
        new_molecule.add_conformer(new_conformers * simtk_unit.angstrom)
        new_molecule.partial_charges = new_charges * simtk_unit.elementary_charge

        # Write the molecule
        new_molecule.to_file(file_name, file_format='mol2')

    @staticmethod
    def _run_tleap(force_field_source, initial_mol2_file_path, directory):
        """Uses tleap to apply parameters to a particular molecule,
        generating a `.prmtop` and a `.rst7` file with the applied parameters.

        Parameters
        ----------
        force_field_source: TLeapForceFieldSource
            The tleap source which describes which parameters to apply.
        initial_mol2_file_path: str
            The path to the MOL2 representation of the molecule to parameterize.
        directory: str
            The directory to store and temporary files / the final
            parameters in.

        Returns
        -------
        str
            The file path to the `prmtop` file.
        str
            The file path to the `rst7` file.
        PropertyEstimatorException, optional
            Any errors which were raised.
        """

        # Change into the working directory.
        with temporarily_change_directory(directory):

            if force_field_source.leap_source == 'leaprc.gaff2':
                amber_type = 'gaff2'
            elif force_field_source.leap_source == 'leaprc.gaff':
                amber_type = 'gaff'
            else:
                return None, None, PropertyEstimatorException(
                    directory, f'The {force_field_source.leap_source} source '
                    f'is currently unsupported. Only the '
                    f'\'leaprc.gaff2\' and \'leaprc.gaff\' '
                    f' sources are supported.')

            # Run antechamber to find the correct atom types.
            processed_mol2_path = 'antechamber.mol2'

            antechamber_process = subprocess.Popen([
                'antechamber', '-i', initial_mol2_file_path, '-fi', 'mol2',
                '-o', processed_mol2_path, '-fo', 'mol2', '-at', amber_type,
                '-rn', 'MOL', '-an', 'no', '-pf', 'yes'
            ],
                                                   stdout=subprocess.PIPE,
                                                   stderr=subprocess.PIPE)

            antechamber_output, antechamber_error = antechamber_process.communicate(
            )
            antechamber_exit_code = antechamber_process.returncode

            with open('antechamber_output.log', 'w') as file:
                file.write(f'error code: {antechamber_exit_code}\nstdout:\n\n')
                file.write('stdout:\n\n')
                file.write(antechamber_output.decode())
                file.write('\nstderr:\n\n')
                file.write(antechamber_error.decode())

            if not os.path.isfile(processed_mol2_path):

                return None, None, PropertyEstimatorException(
                    directory, f'antechamber failed to assign atom types to '
                    f'the input mol2 file '
                    f'({initial_mol2_file_path})')

            frcmod_path = None

            if amber_type == 'gaff' or amber_type == 'gaff2':

                # Optionally run parmchk to find any missing parameters.
                frcmod_path = 'parmck2.frcmod'

                prmchk2_process = subprocess.Popen([
                    'parmchk2', '-i', processed_mol2_path, '-f', 'mol2', '-o',
                    frcmod_path, '-s', amber_type
                ],
                                                   stdout=subprocess.PIPE,
                                                   stderr=subprocess.PIPE)

                prmchk2_output, prmchk2_error = prmchk2_process.communicate()
                prmchk2_exit_code = prmchk2_process.returncode

                with open('prmchk2_output.log', 'w') as file:
                    file.write(f'error code: {prmchk2_exit_code}\nstdout:\n\n')
                    file.write(prmchk2_output.decode())
                    file.write('\nstderr:\n\n')
                    file.write(prmchk2_error.decode())

                if not os.path.isfile(frcmod_path):

                    return None, None, PropertyEstimatorException(
                        directory,
                        f'parmchk2 failed to assign missing {amber_type} '
                        f'parameters to the antechamber created mol2 file '
                        f'({processed_mol2_path})')

            # Build the tleap input file.
            template_lines = [f'source {force_field_source.leap_source}']

            if frcmod_path is not None:
                template_lines.append(f'loadamberparams {frcmod_path}', )

            prmtop_file_name = 'structure.prmtop'
            rst7_file_name = 'structure.rst7'

            template_lines.extend([
                f'MOL = loadmol2 {processed_mol2_path}',
                f'setBox MOL \"centers\"', 'check MOL',
                f'saveamberparm MOL {prmtop_file_name} {rst7_file_name}'
            ])

            input_file_path = 'tleap.in'

            with open(input_file_path, 'w') as file:
                file.write('\n'.join(template_lines))

            # Run tleap.
            tleap_process = subprocess.Popen(
                ['tleap', '-s ', '-f ', input_file_path],
                stdout=subprocess.PIPE)

            tleap_output, _ = tleap_process.communicate()
            tleap_exit_code = tleap_process.returncode

            with open('tleap_output.log', 'w') as file:
                file.write(f'error code: {tleap_exit_code}\nstdout:\n\n')
                file.write(tleap_output.decode())

            if not os.path.isfile(prmtop_file_name) or not os.path.isfile(
                    rst7_file_name):
                return None, None, PropertyEstimatorException(
                    directory, f'tleap failed to execute.')

            with open('leap.log', 'r') as file:

                if not re.search(
                        'ERROR|WARNING|Warning|duplicate|FATAL|Could|Fatal|Error',
                        file.read()):
                    return os.path.join(directory,
                                        prmtop_file_name), os.path.join(
                                            directory, rst7_file_name), None

            return None, None, PropertyEstimatorException(
                directory, f'tleap failed to execute.')

    def execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        logging.info(
            f'Generating a system with tleap for {self.substance.identifier}: {self._id}'
        )

        with open(self.force_field_path) as file:
            force_field_source = ForceFieldSource.parse_json(file.read())

        if not isinstance(force_field_source, TLeapForceFieldSource):

            return PropertyEstimatorException(
                directory=directory,
                message='Only TLeap force field sources are supported by this '
                'protocol.')

        # Load in the systems coordinates / topology
        openmm_pdb_file = app.PDBFile(self.coordinate_file_path)

        # Create an OFF topology for better insight into the layout of the system topology.
        unique_molecules = [
            Molecule.from_smiles(component.smiles)
            for component in self.substance.components
        ]

        topology = Topology.from_openmm(openmm_pdb_file.topology,
                                        unique_molecules)

        # Find a unique instance of each topology molecule to get the correct
        # atom orderings.
        topology_molecules = dict()

        for topology_molecule in topology.topology_molecules:
            topology_molecules[topology_molecule.reference_molecule.to_smiles(
            )] = topology_molecule

        system_templates = {}

        cutoff = pint_quantity_to_openmm(force_field_source.cutoff)

        for index, (smiles, topology_molecule) in enumerate(
                topology_molecules.items()):

            component_directory = os.path.join(directory, str(index))

            if os.path.isdir(component_directory):
                shutil.rmtree(component_directory)

            os.makedirs(component_directory, exist_ok=True)

            if smiles != 'O' and smiles != '[H]O[H]':

                initial_mol2_name = 'initial.mol2'
                initial_mol2_path = os.path.join(component_directory,
                                                 initial_mol2_name)

                self._topology_molecule_to_mol2(topology_molecule,
                                                initial_mol2_path,
                                                self.charge_backend)
                prmtop_path, _, error = self._run_tleap(
                    force_field_source, initial_mol2_name, component_directory)

                if error is not None:
                    return error

                prmtop_file = openmm.app.AmberPrmtopFile(prmtop_path)

                component_system = prmtop_file.createSystem(
                    nonbondedMethod=app.PME,
                    nonbondedCutoff=cutoff,
                    constraints=app.HBonds,
                    rigidWater=True,
                    removeCMMotion=False)

                if openmm_pdb_file.topology.getPeriodicBoxVectors(
                ) is not None:
                    component_system.setDefaultPeriodicBoxVectors(
                        *openmm_pdb_file.topology.getPeriodicBoxVectors())
            else:

                component_system = self._build_tip3p_system(
                    topology_molecule, cutoff,
                    openmm_pdb_file.topology.getUnitCellDimensions())

            system_templates[
                unique_molecules[index].to_smiles()] = component_system

            with open(os.path.join(component_directory, f'component.xml'),
                      'w') as file:
                file.write(openmm.XmlSerializer.serialize(component_system))

        # Create the full system object from the component templates.
        system = None

        for topology_molecule in topology.topology_molecules:

            system_template = system_templates[
                topology_molecule.reference_molecule.to_smiles()]

            if system is None:

                # If no system has been set up yet, just use the first template.
                system = copy.deepcopy(system_template)
                continue

            # Append the component template to the full system.
            self._append_system(system, system_template)

        # Serialize the system object.
        system_xml = openmm.XmlSerializer.serialize(system)

        self.system_path = os.path.join(directory, 'system.xml')

        with open(self.system_path, 'w') as file:
            file.write(system_xml)

        logging.info(f'System generated: {self.id}')

        return self._get_output_dictionary()
コード例 #29
0
class UnpackStoredDataCollection(BaseProtocol):
    """Loads a `StoredDataCollection` object from disk,
    and makes its inner data objects easily accessible to other protocols.
    """

    input_data_path = protocol_input(
        docstring=
        'A tuple which contains both the path to the simulation data object, '
        'it\'s ancillary data directory, and the force field which was used '
        'to generate the stored data.',
        type_hint=Union[list, tuple],
        default_value=UNDEFINED)

    collection_data_paths = protocol_output(
        docstring='A dictionary of data object path, data directory path and '
        'force field path tuples partitioned by the unique collection '
        'keys.',
        type_hint=dict)

    def execute(self, directory, available_resources):

        if len(self.input_data_path) != 3:

            return PropertyEstimatorException(
                directory=directory,
                message='The input data path should be a tuple '
                'of a path to the data object, directory, and a path '
                'to the force field used to generate it.')

        data_object_path = self.input_data_path[0]
        data_directory = self.input_data_path[1]
        force_field_path = self.input_data_path[2]

        if not path.isfile(data_object_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data object'
                'is invalid: {}'.format(data_object_path))

        if not path.isdir(data_directory):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the data directory'
                'is invalid: {}'.format(data_directory))

        if not path.isfile(force_field_path):

            return PropertyEstimatorException(
                directory=directory,
                message='The path to the force field'
                'is invalid: {}'.format(force_field_path))

        with open(data_object_path, 'r') as file:
            data_object = json.load(file, cls=TypedJSONDecoder)

        if not isinstance(data_object, StoredDataCollection):

            return PropertyEstimatorException(
                directory=directory,
                message=f'The data object must be a `StoredDataCollection` '
                f'and not a {type(data_object)}')

        self.collection_data_paths = {}

        for data_key, inner_data_object in data_object.data.items():

            inner_object_path = path.join(directory, f'{data_key}.json')
            inner_directory_path = path.join(data_directory, data_key)

            with open(inner_object_path, 'w') as file:
                json.dump(inner_data_object, file, cls=TypedJSONEncoder)

            self.collection_data_paths[data_key] = (inner_object_path,
                                                    inner_directory_path,
                                                    force_field_path)

        return self._get_output_dictionary()
コード例 #30
0
class BuildSmirnoffSystem(BaseBuildSystemProtocol):
    """Parametrise a set of molecules with a given smirnoff force field
    using the `OpenFF toolkit <https://github.com/openforcefield/openforcefield>`_.
    """

    charged_molecule_paths = protocol_input(
        docstring=
        'File paths to mol2 files which contain the charges assigned to '
        'molecules in the system. This input is helpful when dealing '
        'with large molecules (such as hosts in host-guest binding '
        'calculations) whose charges may by needed in multiple places,'
        ' and hence should only be calculated once.',
        type_hint=list,
        default_value=[])
    apply_known_charges = protocol_input(
        docstring=
        'If true, the formal charges of ions and the partial charges of '
        'the selected water model will be automatically applied to any '
        'matching molecules in the system.',
        type_hint=bool,
        default_value=True)

    @staticmethod
    def _generate_known_charged_molecules():
        """Generates a set of molecules whose charges are known a priori,
        such as ions, for use in parameterised systems.

        Notes
        -----
        These are solely to be used as a work around until library charges
        are fully implemented in the openforcefield toolkit.

        Todos
        -----
        Remove this method when library charges are fully implemented in
        the openforcefield toolkit.

        Returns
        -------
        list of openforcefield.topology.Molecule
            The molecules with assigned charges.
        """
        from openforcefield.topology import Molecule
        from simtk import unit as simtk_unit

        sodium = Molecule.from_smiles('[Na+]')
        sodium.partial_charges = np.array([1.0]) * simtk_unit.elementary_charge

        potassium = Molecule.from_smiles('[K+]')
        potassium.partial_charges = np.array([1.0
                                              ]) * simtk_unit.elementary_charge

        calcium = Molecule.from_smiles('[Ca+2]')
        calcium.partial_charges = np.array([2.0
                                            ]) * simtk_unit.elementary_charge

        chlorine = Molecule.from_smiles('[Cl-]')
        chlorine.partial_charges = np.array([-1.0
                                             ]) * simtk_unit.elementary_charge

        water = Molecule.from_smiles('O')
        water.partial_charges = np.array([-0.834, 0.417, 0.417
                                          ]) * simtk_unit.elementary_charge

        return [sodium, potassium, calcium, chlorine, water]

    def execute(self, directory, available_resources):

        from openforcefield.topology import Molecule, Topology

        logging.info('Generating topology: ' + self.id)

        pdb_file = app.PDBFile(self.coordinate_file_path)

        try:

            with open(self.force_field_path) as file:
                force_field_source = ForceFieldSource.parse_json(file.read())

        except Exception as e:

            return PropertyEstimatorException(
                directory=directory,
                message='{} could not load the ForceFieldSource: {}'.format(
                    self.id, e))

        if not isinstance(force_field_source, SmirnoffForceFieldSource):

            return PropertyEstimatorException(
                directory=directory,
                message='Only SMIRNOFF force fields are supported by this '
                'protocol.')

        force_field = force_field_source.to_force_field()

        unique_molecules = []
        charged_molecules = []

        if self.apply_known_charges:
            charged_molecules = self._generate_known_charged_molecules()

        # Load in any additional, user specified charged molecules.
        for charged_molecule_path in self.charged_molecule_paths:

            charged_molecule = Molecule.from_file(charged_molecule_path,
                                                  'MOL2')
            charged_molecules.append(charged_molecule)

        for component in self.substance.components:

            molecule = Molecule.from_smiles(smiles=component.smiles)

            if molecule is None:

                return PropertyEstimatorException(
                    directory=directory,
                    message='{} could not be converted to a Molecule'.format(
                        component))

            unique_molecules.append(molecule)

        topology = Topology.from_openmm(pdb_file.topology,
                                        unique_molecules=unique_molecules)

        if len(charged_molecules) > 0:
            system = force_field.create_openmm_system(
                topology, charge_from_molecules=charged_molecules)
        else:
            system = force_field.create_openmm_system(topology)

        if system is None:

            return PropertyEstimatorException(
                directory=directory,
                message='Failed to create a system from the'
                'provided topology and molecules')

        from simtk.openmm import XmlSerializer
        system_xml = XmlSerializer.serialize(system)

        self.system_path = os.path.join(directory, 'system.xml')

        with open(self.system_path, 'wb') as file:
            file.write(system_xml.encode('utf-8'))

        logging.info('Topology generated: ' + self.id)

        return self._get_output_dictionary()