class ConcatenateTrajectories(Protocol): """A protocol which concatenates multiple trajectories into a single one. """ input_coordinate_paths = InputAttribute( docstring= "A list of paths to the starting PDB coordinates for each of the " "trajectories.", type_hint=list, default_value=UNDEFINED, ) input_trajectory_paths = InputAttribute( docstring="A list of paths to the trajectories to concatenate.", type_hint=list, default_value=UNDEFINED, ) output_coordinate_path = OutputAttribute( docstring="The path the PDB coordinate file which contains the topology " "of the concatenated trajectory.", type_hint=str, ) output_trajectory_path = OutputAttribute( docstring="The path to the concatenated trajectory.", type_hint=str) def _execute(self, directory, available_resources): import mdtraj if len(self.input_coordinate_paths) != len( self.input_trajectory_paths): raise ValueError( "There should be the same number of coordinate and trajectory paths." ) if len(self.input_trajectory_paths) == 0: raise ValueError("No trajectories were given to concatenate.") trajectories = [] output_coordinate_path = None for coordinate_path, trajectory_path in zip( self.input_coordinate_paths, self.input_trajectory_paths): output_coordinate_path = output_coordinate_path or coordinate_path trajectories.append( mdtraj.load_dcd(trajectory_path, coordinate_path)) self.output_coordinate_path = output_coordinate_path output_trajectory = (trajectories[0] if len(trajectories) == 1 else mdtraj.join(trajectories, False, False)) self.output_trajectory_path = path.join(directory, "output_trajectory.dcd") output_trajectory.save_dcd(self.output_trajectory_path)
class AveragePropertyProtocol(Protocol, abc.ABC): """An abstract base class for protocols which will calculate the average of a property and its uncertainty via bootstrapping. """ bootstrap_iterations = InputAttribute( docstring="The number of bootstrap iterations to perform.", type_hint=int, default_value=250, merge_behavior=InequalityMergeBehaviour.LargestValue, ) bootstrap_sample_size = InputAttribute( docstring="The relative sample size to use for bootstrapping.", type_hint=float, default_value=1.0, merge_behavior=InequalityMergeBehaviour.LargestValue, ) equilibration_index = OutputAttribute( docstring= "The index in the data set after which the data is stationary.", type_hint=int, ) statistical_inefficiency = OutputAttribute( docstring="The statistical inefficiency in the data set.", type_hint=float) value = OutputAttribute(docstring="The average value and its uncertainty.", type_hint=pint.Measurement) uncorrelated_values = OutputAttribute( docstring= "The uncorrelated values which the average was calculated from.", type_hint=pint.Quantity, ) def _bootstrap_function(self, **sample_kwargs): """The function to perform on the data set being sampled by bootstrapping. Parameters ---------- sample_kwargs: dict of str and np.ndarray A key words dictionary of the bootstrap sample data, where the sample data is a numpy array of shape=(num_frames, num_dimensions) with dtype=float. Returns ------- float The result of evaluating the data. """ assert len(sample_kwargs) == 1 sample_data = next(iter(sample_kwargs.values())) return sample_data.mean()
class ExtractUncorrelatedStatisticsData(ExtractUncorrelatedData): """A protocol which will subsample entries from a statistics array, yielding only uncorrelated entries as determined from a provided statistical inefficiency and equilibration time. """ input_statistics_path = InputAttribute( docstring="The file path to the statistics to subsample.", type_hint=str, default_value=UNDEFINED, ) output_statistics_path = OutputAttribute( docstring="The file path to the subsampled statistics.", type_hint=str) def _execute(self, directory, available_resources): statistics_array = StatisticsArray.from_pandas_csv( self.input_statistics_path) uncorrelated_indices = timeseries.get_uncorrelated_indices( len(statistics_array) - self.equilibration_index, self.statistical_inefficiency, ) uncorrelated_indices = [ index + self.equilibration_index for index in uncorrelated_indices ] uncorrelated_statistics = StatisticsArray.from_existing( statistics_array, uncorrelated_indices) self.output_statistics_path = path.join(directory, "uncorrelated_statistics.csv") uncorrelated_statistics.to_pandas_csv(self.output_statistics_path) self.number_of_uncorrelated_samples = len(uncorrelated_statistics)
class ComputeSymmetryCorrection(Protocol): """Computes the symmetry correction for an APR calculation which involves a guest with symmetry. """ n_microstates = InputAttribute( docstring="The number of symmetry microstates of the guest molecule.", type_hint=int, default_value=UNDEFINED, ) thermodynamic_state = InputAttribute( docstring= "The thermodynamic state that the calculation was performed at.", type_hint=ThermodynamicState, default_value=UNDEFINED, ) result = OutputAttribute(docstring="The symmetry correction.", type_hint=Observable) def _execute(self, directory, available_resources): from paprika.evaluator import Analyze self.result = Observable( unit.Measurement( Analyze.symmetry_correction( self.n_microstates, self.thermodynamic_state.temperature.to( unit.kelvin).magnitude, ) * unit.kilocalorie / unit.mole, 0 * unit.kilocalorie / unit.mole, ))
class MultiplyValue(Protocol): """A protocol which multiplies a value by a specified scalar""" value = InputAttribute( docstring="The value to multiply.", type_hint=typing.Union[ int, float, pint.Quantity, pint.Measurement, ParameterGradient ], default_value=UNDEFINED, ) multiplier = InputAttribute( docstring="The scalar to multiply by.", type_hint=typing.Union[int, float, pint.Quantity], default_value=UNDEFINED, ) result = OutputAttribute( docstring="The result of the multiplication.", type_hint=typing.Union[ int, float, pint.Measurement, pint.Quantity, ParameterGradient ], ) def _execute(self, directory, available_resources): self.result = self.value * self.multiplier
class ConcatenateStatistics(Protocol): """A protocol which concatenates multiple trajectories into a single one. """ input_statistics_paths = InputAttribute( docstring="A list of paths to statistics arrays to concatenate.", type_hint=list, default_value=UNDEFINED, ) output_statistics_path = OutputAttribute( docstring="The path the csv file which contains the concatenated statistics.", type_hint=str, ) def _execute(self, directory, available_resources): if len(self.input_statistics_paths) == 0: raise ValueError("No statistics arrays were given to concatenate.") arrays = [ StatisticsArray.from_pandas_csv(file_path) for file_path in self.input_statistics_paths ] if len(arrays) > 1: output_array = StatisticsArray.join(*arrays) else: output_array = arrays[0] self.output_statistics_path = path.join(directory, "output_statistics.csv") output_array.to_pandas_csv(self.output_statistics_path)
class ConcatenateObservables(Protocol): """A protocol which concatenates multiple ``ObservableFrame`` objects into a single ``ObservableFrame`` object. """ input_observables = InputAttribute( docstring="A list of observable arrays to concatenate.", type_hint=list, default_value=UNDEFINED, ) output_observables = OutputAttribute( docstring="The concatenated observable array.", type_hint=typing.Union[ObservableArray, ObservableFrame], ) def _execute(self, directory, available_resources): if len(self.input_observables) == 0: raise ValueError("No arrays were given to concatenate.") if not all( isinstance(observables, type(self.input_observables[0])) for observables in self.input_observables): raise ValueError( "The observables to concatenate must be the same type.") object_type = type(self.input_observables[0]) if len(self.input_observables) > 1: self.output_observables = object_type.join(*self.input_observables) else: self.output_observables = copy.deepcopy(self.input_observables[0])
class SubtractValues(Protocol): """A protocol to subtract one value from another such that: `result = value_b - value_a` """ value_a = InputAttribute( docstring="`value_a` in the formula `result` = `value_b` - `value_a`.", type_hint=typing.Union[ int, float, pint.Quantity, pint.Measurement, ParameterGradient ], default_value=UNDEFINED, ) value_b = InputAttribute( docstring="`value_b` in the formula `result` = `value_b` - `value_a`.", type_hint=typing.Union[ int, float, pint.Quantity, pint.Measurement, ParameterGradient ], default_value=UNDEFINED, ) result = OutputAttribute( docstring="The results of `value_b` - `value_a`.", type_hint=typing.Union[ int, float, pint.Measurement, pint.Quantity, ParameterGradient ], ) def _execute(self, directory, available_resources): self.result = self.value_b - self.value_a
class AddValues(Protocol): """A protocol to add together a list of values. Notes ----- The `values` input must either be a list of pint.Quantity, a ProtocolPath to a list of pint.Quantity, or a list of ProtocolPath which each point to a pint.Quantity. """ values = InputAttribute( docstring="The values to add together.", type_hint=list, default_value=UNDEFINED ) result = OutputAttribute( docstring="The sum of the values.", type_hint=typing.Union[ int, float, pint.Measurement, pint.Quantity, ParameterGradient ], ) def _execute(self, directory, available_resources): if len(self.values) < 1: raise ValueError("There were no values to add together") self.result = self.values[0] for value in self.values[1:]: self.result += value
class DivideValue(Protocol): """A protocol which divides a value by a specified scalar""" value = InputAttribute( docstring="The value to divide.", type_hint=typing.Union[ int, float, pint.Quantity, pint.Measurement, ParameterGradient ], default_value=UNDEFINED, ) divisor = InputAttribute( docstring="The scalar to divide by.", type_hint=typing.Union[int, float, pint.Quantity], default_value=UNDEFINED, ) result = OutputAttribute( docstring="The result of the division.", type_hint=typing.Union[ int, float, pint.Measurement, pint.Quantity, ParameterGradient ], ) def _execute(self, directory, available_resources): self.result = self.value / self.divisor
class DecorrelateObservables(BaseDecorrelateProtocol): """A protocol which will subsample a trajectory of observables, yielding only uncorrelated entries as determined from a provided statistical inefficiency and equilibration time. """ input_observables = InputAttribute( docstring="The observables to decorrelate.", type_hint=typing.Union[ObservableArray, ObservableFrame], default_value=UNDEFINED, ) output_observables = OutputAttribute( docstring="The decorrelated observables.", type_hint=typing.Union[ObservableArray, ObservableFrame], ) def _execute(self, directory, available_resources): assert len(self.input_observables) == self._n_expected() uncorrelated_indices = self._uncorrelated_indices() uncorrelated_observable = self.input_observables.subset(uncorrelated_indices) self.output_observables = uncorrelated_observable
class AttributeObject(AttributeClass): required_input = InputAttribute("", str, UNDEFINED, optional=False) optional_input = InputAttribute("", int, UNDEFINED, optional=True) some_output = OutputAttribute("", int) def __init__(self): self.some_output = 5
class _GenerateRestraints(Protocol, abc.ABC): """The base class which will generate a set of restraint values from their respective schemas and for a specific APR phase. """ restraint_schemas = InputAttribute( docstring="The full set of restraint schemas.", type_hint=dict, default_value=UNDEFINED, ) restraints_path = OutputAttribute( docstring="The file path to the `paprika` generated restraints JSON file.", type_hint=str, ) @classmethod def _restraints_to_dict(cls, restraints): """Converts a list of ``paprika`` restraint objects to a list of JSON compatible dictionary representations """ from paprika.io import NumpyEncoder return [ json.loads(json.dumps(restraint.__dict__, cls=NumpyEncoder)) for restraint in restraints ] def _save_restraints( self, directory: str, static_restraints, conformational_restraints, symmetry_restraints=None, wall_restraints=None, guest_restraints=None, ): """Saves the restraints to a convenient JSON file.""" symmetry_restraints = [] if symmetry_restraints is None else symmetry_restraints wall_restraints = [] if wall_restraints is None else wall_restraints guest_restraints = [] if guest_restraints is None else guest_restraints restraints_dictionary = { "static": self._restraints_to_dict(static_restraints), "conformational": self._restraints_to_dict(conformational_restraints), "symmetry": self._restraints_to_dict(symmetry_restraints), "wall": self._restraints_to_dict(wall_restraints), "guest": self._restraints_to_dict(guest_restraints), } self.restraints_path = os.path.join(directory, "restraints.json") with open(self.restraints_path, "w") as file: json.dump(restraints_dictionary, file)
class DummyReplicableProtocol(Protocol): replicated_value_a = InputAttribute(docstring="", type_hint=Union[str, int, float], default_value=UNDEFINED) replicated_value_b = InputAttribute(docstring="", type_hint=Union[str, int, float], default_value=UNDEFINED) final_value = OutputAttribute(docstring="", type_hint=unit.Measurement) def _execute(self, directory, available_resources): pass
class DummyInputOutputProtocol(Protocol): input_value = InputAttribute( docstring="A dummy input.", type_hint=Union[str, int, float, pint.Quantity, pint.Measurement, list, tuple, dict, set, frozenset, ], default_value=UNDEFINED, ) output_value = OutputAttribute( docstring="A dummy output.", type_hint=Union[str, int, float, pint.Quantity, pint.Measurement, list, tuple, dict, set, frozenset, ], ) def _execute(self, directory, available_resources): self.output_value = self.input_value
class BaseEvaluateEnergies(Protocol, abc.ABC): """A base class for protocols which will re-evaluate the energy of a series of configurations for a given set of force field parameters. """ thermodynamic_state = InputAttribute( docstring="The state to calculate the reduced potentials at.", type_hint=ThermodynamicState, default_value=UNDEFINED, ) parameterized_system = InputAttribute( docstring= "The parameterized system object which encodes the systems potential " "energy function.", type_hint=ParameterizedSystem, default_value=UNDEFINED, ) enable_pbc = InputAttribute( docstring="If true, periodic boundary conditions will be enabled.", type_hint=bool, default_value=True, ) trajectory_file_path = InputAttribute( docstring="The path to the trajectory file which contains the " "configurations to calculate the energies of.", type_hint=str, default_value=UNDEFINED, ) gradient_parameters = InputAttribute( docstring= "An optional list of parameters to differentiate the evaluated " "energies with respect to.", type_hint=list, default_value=lambda: list(), ) output_observables = OutputAttribute( docstring= "An observable array which stores the reduced potentials potential " "energies evaluated at the specified state and using the specified system " "object for each configuration in the trajectory.", type_hint=ObservableFrame, )
class ExtractUncorrelatedData(Protocol, abc.ABC): """An abstract base class for protocols which will subsample a data set, yielding only equilibrated, uncorrelated data. """ equilibration_index = InputAttribute( docstring= "The index in the data set after which the data is stationary.", type_hint=int, default_value=UNDEFINED, merge_behavior=InequalityMergeBehaviour.LargestValue, ) statistical_inefficiency = InputAttribute( docstring="The statistical inefficiency in the data set.", type_hint=float, default_value=UNDEFINED, merge_behavior=InequalityMergeBehaviour.LargestValue, ) number_of_uncorrelated_samples = OutputAttribute( docstring="The number of uncorrelated samples.", type_hint=int)
class DummyProtocol(Protocol): """A protocol whose only purpose is to return an input value as an output value.""" input_value = InputAttribute( docstring="A dummy input.", type_hint=typing.Union[str, int, float, unit.Quantity, unit.Measurement, Observable, ObservableArray, ParameterGradient, ParameterGradientKey, list, tuple, dict, set, frozenset, ], default_value=UNDEFINED, ) output_value = OutputAttribute( docstring="A dummy output.", type_hint=typing.Union[str, int, float, unit.Quantity, unit.Measurement, Observable, ObservableArray, ParameterGradient, ParameterGradientKey, list, tuple, dict, set, frozenset, ], ) def _execute(self, directory, available_resources): self.output_value = self.input_value
class _PrepareAPRCoordinates(Protocol, abc.ABC): """The base class for protocols which will be used to prepare the coordinates for an APR calculation. """ substance = InputAttribute( docstring="The substance which defines the host, guest and solvent.", type_hint=Substance, default_value=UNDEFINED, ) complex_file_path = InputAttribute( docstring="The path to the file which the coordinates of the guest molecule" "bound to the host molecule.", type_hint=str, default_value=UNDEFINED, ) output_coordinate_path = OutputAttribute( docstring="The file path to the system which has been correctly aligned to " "the z-axis.", type_hint=str, )
class BaseEnergyMinimisation(Protocol, abc.ABC): """A base class for protocols which will minimise the potential energy of a given system. """ input_coordinate_file = InputAttribute( docstring="The coordinates to minimise.", type_hint=str, default_value=UNDEFINED) system_path = InputAttribute( docstring= "The path to the XML system object which defines the forces present " "in the system.", type_hint=str, default_value=UNDEFINED, ) tolerance = InputAttribute( docstring= "The energy tolerance to which the system should be minimized.", type_hint=pint.Quantity, default_value=10 * unit.kilojoules / unit.mole, ) max_iterations = InputAttribute( docstring="The maximum number of iterations to perform. If this is 0, " "minimization is continued until the results converge without regard to " "how many iterations it takes.", type_hint=int, default_value=0, ) enable_pbc = InputAttribute( docstring="If true, periodic boundary conditions will be enabled.", type_hint=bool, default_value=True, ) output_coordinate_file = OutputAttribute( docstring="The file path to the minimised coordinates.", type_hint=str)
class ComputeReferenceWork(Protocol): """Computes the reference state work.""" thermodynamic_state = InputAttribute( docstring= "The thermodynamic state that the calculation was performed at.", type_hint=ThermodynamicState, default_value=UNDEFINED, ) restraints_path = InputAttribute( docstring="The file path to the JSON file which contains the restraint " "definitions. This will usually have been generated by a " "`GenerateXXXRestraints` protocol.", type_hint=str, default_value=UNDEFINED, ) result = OutputAttribute(docstring="The reference state work.", type_hint=Observable) def _execute(self, directory, available_resources): from paprika.evaluator import Analyze restraints = ApplyRestraints.load_restraints(self.restraints_path) guest_restraints = restraints["guest"] self.result = Observable( unit.Measurement( -Analyze.compute_ref_state_work( self.thermodynamic_state.temperature.to( unit.kelvin).magnitude, guest_restraints, ) * unit.kilocalorie / unit.mole, 0 * unit.kilocalorie / unit.mole, ))
class FilterSubstanceByRole(Protocol): """A protocol which takes a substance as input, and returns a substance which only contains components whose role match a given criteria. """ input_substance = InputAttribute( docstring="The substance to filter.", type_hint=Substance, default_value=UNDEFINED, ) component_roles = InputAttribute( docstring="The roles to filter substance components against.", type_hint=list, default_value=UNDEFINED, ) expected_components = InputAttribute( docstring="The number of components expected to remain after filtering. " "An exception is raised if this number is not matched.", type_hint=int, default_value=UNDEFINED, optional=True, ) filtered_substance = OutputAttribute( docstring="The filtered substance.", type_hint=Substance ) def _execute(self, directory, available_resources): filtered_components = [] total_mole_fraction = 0.0 for component in self.input_substance.components: if component.role not in self.component_roles: continue filtered_components.append(component) amounts = self.input_substance.get_amounts(component) for amount in amounts: if not isinstance(amount, MoleFraction): continue total_mole_fraction += amount.value if self.expected_components != UNDEFINED and self.expected_components != len( filtered_components ): raise ValueError( f"The filtered substance does not contain the expected number of " f"components ({self.expected_components}) - {filtered_components}", ) inverse_mole_fraction = ( 1.0 if np.isclose(total_mole_fraction, 0.0) else 1.0 / total_mole_fraction ) self.filtered_substance = Substance() for component in filtered_components: amounts = self.input_substance.get_amounts(component) for amount in amounts: if isinstance(amount, MoleFraction): amount = MoleFraction(amount.value * inverse_mole_fraction) self.filtered_substance.add_component(component, amount) def validate(self, attribute_type=None): super(FilterSubstanceByRole, self).validate(attribute_type) assert all(isinstance(x, Component.Role) for x in self.component_roles)
class AddDummyAtoms(Protocol): """A protocol which will add the reference 'dummy' atoms to a parameterised system. This protocol assumes the host / complex has already been correctly aligned to the z-axis and has been placed at the origin. """ substance = InputAttribute( docstring="The substance which defines the host, guest and solvent.", type_hint=Substance, default_value=UNDEFINED, ) offset = InputAttribute( docstring="The distance to offset the dummy atoms from the origin (0, 0, 0) " "backwards along the z-axis.", type_hint=unit.Quantity, default_value=UNDEFINED, ) input_coordinate_path = InputAttribute( docstring="The file path to the coordinates which the dummy atoms " "should be added to.", type_hint=str, default_value=UNDEFINED, ) input_system = InputAttribute( docstring="The parameterized system which the dummy atoms " "should be added to.", type_hint=ParameterizedSystem, default_value=UNDEFINED, ) output_coordinate_path = OutputAttribute( docstring="The file path to the coordinates which include the added dummy " "atoms.", type_hint=str, ) output_system = OutputAttribute( docstring="The parameterized system which include the added dummy atoms.", type_hint=ParameterizedSystem, ) def _execute(self, directory, available_resources): import parmed.geometry from paprika.evaluator import Setup from simtk.openmm import NonbondedForce, XmlSerializer, app # Extract the host atoms to determine the offset of the dummy atoms. # noinspection PyTypeChecker input_structure: parmed.Structure = parmed.load_file( self.input_coordinate_path, structure=True ) # Add the dummy atoms to the structure. offset = self.offset.to(unit.angstrom).magnitude Setup.add_dummy_atoms_to_structure( input_structure, [ numpy.array([0, 0, -offset]), numpy.array([0, 0, -3.0 - offset]), numpy.array([0, 2.2, -5.2 - offset]), ], numpy.zeros(3), ) # Shift the structure to avoid issues with the PBC input_structure.coordinates += numpy.array( [ input_structure.box[0] * 0.5, input_structure.box[1] * 0.5, -input_structure.coordinates[-1, 2] + 1.0, ] ) # Save the final coordinates. self.output_coordinate_path = os.path.join(directory, "output.pdb") with open(self.output_coordinate_path, "w") as file: app.PDBFile.writeFile( input_structure.topology, input_structure.positions, file, True ) # Add the dummy atoms to the system. system = self.input_system.system for _ in range(3): system.addParticle(mass=207) for force_index in range(system.getNumForces()): force = system.getForce(force_index) if not isinstance(force, NonbondedForce): continue force.addParticle(0.0, 1.0, 0.0) force.addParticle(0.0, 1.0, 0.0) force.addParticle(0.0, 1.0, 0.0) output_system_path = os.path.join(directory, "output.xml") with open(output_system_path, "w") as file: file.write(XmlSerializer.serialize(system)) self.output_system = ParameterizedSystem( self.input_system.substance, self.input_system.force_field, self.output_coordinate_path, output_system_path, )
class ConditionalGroup(ProtocolGroup): """A collection of protocols which are to execute until a given condition is met. """ class Condition(AttributeClass): """Defines a specific condition which must be met of the form `left_hand_value` [TYPE] `right_hand_value`, where `[TYPE]` may be less than or greater than. """ @unique class Type(Enum): """The available condition types.""" LessThan = "lessthan" GreaterThan = "greaterthan" left_hand_value = Attribute( docstring="The left-hand value to compare.", type_hint=typing.Union[int, float, pint.Quantity], ) right_hand_value = Attribute( docstring="The right-hand value to compare.", type_hint=typing.Union[int, float, pint.Quantity], ) type = Attribute( docstring="The right-hand value to compare.", type_hint=Type, default_value=Type.LessThan, ) def __eq__(self, other): return (type(self) == type(other) and self.left_hand_value == other.left_hand_value and self.right_hand_value == other.right_hand_value and self.type == other.type) def __ne__(self, other): return not self.__eq__(other) def __str__(self): return f"{self.left_hand_value} {self.type} {self.right_hand_value}" def __repr__(self): return f"<Condition {str(self)}>" conditions = InputAttribute( docstring="The conditions which must be satisfied before" "the group will cleanly exit.", type_hint=list, default_value=[], merge_behavior=MergeBehaviour.Custom, ) current_iteration = OutputAttribute( docstring= "The current number of iterations this group has performed while " "attempting to satisfy the specified conditions. This value starts " "from one.", type_hint=int, ) max_iterations = InputAttribute( docstring= "The maximum number of iterations to run for to try and satisfy the " "groups conditions.", type_hint=int, default_value=100, merge_behavior=InequalityMergeBehaviour.LargestValue, ) def __init__(self, protocol_id): super(ConditionalGroup, self).__init__(protocol_id) # We disable checkpoint, as protocols may change their inputs # at each iteration and hence their checkpointed outputs may # be invalidated. self._enable_checkpointing = False def _evaluate_condition(self, condition): """Evaluates whether a condition has been successfully met. Parameters ---------- condition: ConditionalGroup.Condition The condition to evaluate. Returns ------- bool True if the condition has been met. """ left_hand_value = condition.left_hand_value right_hand_value = condition.right_hand_value if isinstance(condition.left_hand_value, ProtocolPath): left_hand_value = self.get_value(condition.left_hand_value) if isinstance(condition.right_hand_value, ProtocolPath): right_hand_value = self.get_value(condition.right_hand_value) if left_hand_value == UNDEFINED or right_hand_value == UNDEFINED: return False if isinstance(right_hand_value, pint.Quantity) and isinstance( left_hand_value, pint.Quantity): right_hand_value = right_hand_value.to(left_hand_value.units) logger.info(f"Evaluating condition for protocol {self.id}: " f"{left_hand_value} {condition.type} {right_hand_value}") if condition.type == self.Condition.Type.LessThan: return left_hand_value < right_hand_value elif condition.type == self.Condition.Type.GreaterThan: return left_hand_value > right_hand_value raise NotImplementedError() @staticmethod def _write_checkpoint(directory, current_iteration): """Creates a checkpoint file for this group so that it can continue executing where it left off if it was killed for some reason (e.g the worker it was running on was killed). Parameters ---------- directory: str The path to the working directory of this protocol current_iteration: int The number of iterations this group has performed so far. """ checkpoint_path = path.join(directory, "checkpoint.json") with open(checkpoint_path, "w") as file: json.dump({"current_iteration": current_iteration}, file) @staticmethod def _read_checkpoint(directory): """Creates a checkpoint file for this group so that it can continue executing where it left off if it was killed for some reason (e.g the worker it was running on was killed). Parameters ---------- directory: str The path to the working directory of this protocol Returns ------- int The number of iterations this group has performed so far. """ current_iteration = 0 checkpoint_path = path.join(directory, "checkpoint.json") if not path.isfile(checkpoint_path): return current_iteration with open(checkpoint_path, "r") as file: checkpoint_dictionary = json.load(file) current_iteration = checkpoint_dictionary["current_iteration"] return current_iteration def _execute(self, directory, available_resources): """Executes the protocols within this groups Parameters ---------- directory : str The root directory in which to run the protocols available_resources: ComputeResources The resources available to execute on. Returns ------- bool True if all the protocols execute correctly. """ should_continue = True self.current_iteration = self._read_checkpoint(directory) # Keep a track of the original protocol schemas original_schemas = [x.schema for x in self._protocols] while should_continue: # Create a checkpoint file so we can pick off where # we left off if this execution fails due to time # constraints for e.g. self._write_checkpoint(directory, self.current_iteration) self.current_iteration += 1 # Reset the protocols from their schemas - this will ensure # that at each iteration protocols which take their inputs from # other protocols in the group get their inputs updated correctly. for protocol, schema in zip(self._protocols, original_schemas): protocol.schema = schema super(ConditionalGroup, self)._execute(directory, available_resources) conditions_met = True for condition in self._conditions: # Check to see if we have reached our goal. if not self._evaluate_condition(condition): conditions_met = False if conditions_met: logger.info( f"{self.id} loop finished after {self.current_iteration} iterations" ) return if self.current_iteration >= self.max_iterations: raise RuntimeError(f"{self.id} failed to converge.") logger.info( f"{self.id} criteria not yet met after {self.current_iteration} " f"iterations") def merge(self, other): """Merges another ProtocolGroup with this one. The id of this protocol will remain unchanged. It is assumed that can_merge has already returned that these protocol groups are compatible to be merged together. Parameters ---------- other: ConditionalGroup The protocol to merge into this one. """ merged_ids = super(ConditionalGroup, self).merge(other) for condition in other.conditions: if isinstance(condition.left_hand_value, ProtocolPath): condition.left_hand_value.replace_protocol(other.id, self.id) if isinstance(condition.right_hand_value, ProtocolPath): condition.right_hand_value.replace_protocol(other.id, self.id) for merged_id in merged_ids: if isinstance(condition.left_hand_value, ProtocolPath): condition.left_hand_value.replace_protocol( merged_id, merged_ids[merged_id]) if isinstance(condition.right_hand_value, ProtocolPath): condition.right_hand_value.replace_protocol( merged_id, merged_ids[merged_id]) self.add_condition(condition) return merged_ids def add_condition(self, condition_to_add): """Adds a condition to this groups list of conditions if it not already in the condition list. Parameters ---------- condition_to_add: :obj:`ConditionalGroup.Condition` The condition to add. """ for condition in self.conditions: if condition == condition_to_add: return self.conditions.append(condition_to_add) def get_value_references(self, input_path): if input_path.property_name != "conditions": return super(ConditionalGroup, self).get_value_references(input_path) value_references = {} for index, condition in enumerate(self.conditions): if isinstance(condition.left_hand_value, ProtocolPath): source_path = ProtocolPath( "conditions[{}].left_hand_value".format(index)) value_references[source_path] = condition.left_hand_value if isinstance(condition.right_hand_value, ProtocolPath): source_path = ProtocolPath( "conditions[{}].right_hand_value".format(index)) value_references[source_path] = condition.right_hand_value return value_references
class ZeroGradients(Protocol, abc.ABC): """Zeros the gradients of an observable with respect to a specified set of force field parameters. """ input_observables = InputAttribute( docstring="The observable to set the gradients of.", type_hint=Union[Observable, ObservableArray], default_value=UNDEFINED, ) force_field_path = InputAttribute( docstring="The path to the force field which contains the parameters to " "differentiate the observable with respect to. This is many used to get the " "correct units for the parameters.", type_hint=str, default_value=UNDEFINED, ) gradient_parameters = InputAttribute( docstring="The parameters to zero the gradient with respect to.", type_hint=list, default_value=lambda: list(), ) output_observables = OutputAttribute( docstring="The observable with zeroed gradients.", type_hint=Union[Observable, ObservableArray], ) def _execute(self, directory, available_resources): force_field_source = ForceFieldSource.from_json(self.force_field_path) if not isinstance(force_field_source, SmirnoffForceFieldSource): raise ValueError("Only SMIRNOFF force fields are supported.") force_field = force_field_source.to_force_field() parameter_units = { gradient_key: openmm_quantity_to_pint( getattr( force_field.get_parameter_handler( gradient_key.tag).parameters[gradient_key.smirks], gradient_key.attribute, )).units for gradient_key in self.gradient_parameters } self.input_observables.clear_gradients() if isinstance(self.input_observables, Observable): self.output_observables = Observable( value=self.input_observables.value, gradients=[ ParameterGradient( key=gradient_key, value=(0.0 * self.input_observables.value.units / parameter_units[gradient_key]), ) for gradient_key in self.gradient_parameters ], ) elif isinstance(self.input_observables, ObservableArray): self.output_observables = ObservableArray( value=self.input_observables.value, gradients=[ ParameterGradient( key=gradient_key, value=( numpy.zeros(self.input_observables.value.shape) * self.input_observables.value.units / parameter_units[gradient_key]), ) for gradient_key in self.gradient_parameters ], ) else: raise NotImplementedError()
class BaseSimulation(Protocol, abc.ABC): """A base class for protocols which will perform a molecular simulation in a given ensemble and at a specified state. """ steps_per_iteration = InputAttribute( docstring="The number of steps to propogate the system by at " "each iteration. The total number of steps performed " "by this protocol will be `total_number_of_iterations * " "steps_per_iteration`.", type_hint=int, merge_behavior=InequalityMergeBehaviour.LargestValue, default_value=1000000, ) total_number_of_iterations = InputAttribute( docstring="The number of times to propogate the system forward by the " "`steps_per_iteration` number of steps. The total number of " "steps performed by this protocol will be `total_number_of_iterations * " "steps_per_iteration`.", type_hint=int, merge_behavior=InequalityMergeBehaviour.LargestValue, default_value=1, ) output_frequency = InputAttribute( docstring= "The frequency (in number of steps) with which to write to the " "output statistics and trajectory files.", type_hint=int, merge_behavior=InequalityMergeBehaviour.SmallestValue, default_value=3000, ) checkpoint_frequency = InputAttribute( docstring= "The frequency (in multiples of `output_frequency`) with which to " "write to a checkpoint file, e.g. if `output_frequency=100` and " "`checkpoint_frequency==2`, a checkpoint file would be saved every " "200 steps.", type_hint=int, merge_behavior=InequalityMergeBehaviour.SmallestValue, optional=True, default_value=10, ) timestep = InputAttribute( docstring="The timestep to evolve the system by at each step.", type_hint=pint.Quantity, merge_behavior=InequalityMergeBehaviour.SmallestValue, default_value=2.0 * unit.femtosecond, ) thermodynamic_state = InputAttribute( docstring="The thermodynamic conditions to simulate under", type_hint=ThermodynamicState, default_value=UNDEFINED, ) ensemble = InputAttribute( docstring="The thermodynamic ensemble to simulate in.", type_hint=Ensemble, default_value=Ensemble.NPT, ) thermostat_friction = InputAttribute( docstring="The thermostat friction coefficient.", type_hint=pint.Quantity, merge_behavior=InequalityMergeBehaviour.SmallestValue, default_value=1.0 / unit.picoseconds, ) input_coordinate_file = InputAttribute( docstring="The file path to the starting coordinates.", type_hint=str, default_value=UNDEFINED, ) system_path = InputAttribute( docstring= "A path to the XML system object which defines the forces present " "in the system.", type_hint=str, default_value=UNDEFINED, ) enable_pbc = InputAttribute( docstring="If true, periodic boundary conditions will be enabled.", type_hint=bool, default_value=True, ) allow_gpu_platforms = InputAttribute( docstring= "If true, the simulation will be performed using a GPU if available, " "otherwise it will be constrained to only using CPUs.", type_hint=bool, default_value=True, ) high_precision = InputAttribute( docstring="If true, the simulation will be run using double precision.", type_hint=bool, default_value=False, ) output_coordinate_file = OutputAttribute( docstring= "The file path to the coordinates of the final system configuration.", type_hint=str, ) trajectory_file_path = OutputAttribute( docstring= "The file path to the trajectory sampled during the simulation.", type_hint=str, ) statistics_file_path = OutputAttribute( docstring= "The file path to the statistics sampled during the simulation.", type_hint=str, )
class ApplyRestraints(Protocol): """A protocol which will apply the restraints defined in a restraints JSON file to a specified system. """ restraints_path = InputAttribute( docstring="The file path to the JSON file which contains the restraint " "definitions. This will usually have been generated by a " "`GenerateXXXRestraints` protocol.", type_hint=str, default_value=UNDEFINED, ) phase = InputAttribute( docstring="The APR phase to take the restraints from.", type_hint=str, default_value=UNDEFINED, ) window_index = InputAttribute( docstring="The index of the window to take the restraints from.", type_hint=int, default_value=UNDEFINED, ) input_system = InputAttribute( docstring="The parameterized system which the restraints should be added " "to.", type_hint=ParameterizedSystem, default_value=UNDEFINED, ) output_system = OutputAttribute( docstring="The parameterized system which now includes the added restraints.", type_hint=ParameterizedSystem, ) @classmethod def _parse_restraints(cls, restraint_dictionaries): """Parses the dictionary representations of a list of `paprika` restraint objects into a list of full restraint objects.""" from paprika.restraints import DAT_restraint restraints = [] for restraint_dictionary in restraint_dictionaries: restraint = DAT_restraint() restraint.__dict__ = restraint_dictionary properties = [ "mask1", "mask2", "mask3", "mask4", "topology", "instances", "custom_restraint_values", "auto_apr", "continuous_apr", "attach", "pull", "release", "amber_index", ] for class_property in properties: if f"_{class_property}" in restraint.__dict__.keys(): restraint.__dict__[class_property] = restraint.__dict__[ f"_{class_property}" ] restraints.append(restraint) return restraints @classmethod def load_restraints(cls, file_path: str): """Loads a set of `paprika` restraint objects from a JSON file. Parameters ---------- file_path The path to the JSON serialized restraints. Returns ------- The loaded `paprika` restraint objects. """ from paprika.io import json_numpy_obj_hook with open(file_path) as file: restraints_dictionary = json.load(file, object_hook=json_numpy_obj_hook) restraints = { restraint_type: cls._parse_restraints(restraints_dictionary[restraint_type]) for restraint_type in restraints_dictionary } return restraints def _execute(self, directory, available_resources): from paprika.restraints.openmm import ( apply_dat_restraint, apply_positional_restraints, ) from simtk.openmm import XmlSerializer # Load in the system to add the restraints to. system = self.input_system.system # Define a custom force group per type of restraint to help # with debugging / analysis. force_groups = { "static": 10, "conformational": 11, "guest": 12, "symmetry": 13, "wall": 14, } # Apply the serialized restraints. restraints = self.load_restraints(self.restraints_path) for restraint_type in force_groups: if restraint_type not in restraints: continue for restraint in restraints[restraint_type]: apply_dat_restraint( system, restraint, self.phase, self.window_index, flat_bottom=restraint_type in ["symmetry", "wall"], force_group=force_groups[restraint_type], ) # Apply the positional restraints to the dummy atoms. apply_positional_restraints( self.input_system.topology_path, system, force_group=15 ) output_system_path = os.path.join(directory, "output.xml") with open(output_system_path, "w") as file: file.write(XmlSerializer.serialize(system)) self.output_system = ParameterizedSystem( substance=self.input_system.substance, force_field=self.input_system.force_field, topology_path=self.input_system.topology_path, system_path=output_system_path, )
class WeightByMoleFraction(Protocol): """Multiplies a value by the mole fraction of a component in a `Substance`. """ value = InputAttribute( docstring="The value to be weighted.", type_hint=typing.Union[ float, int, pint.Measurement, pint.Quantity, ParameterGradient ], default_value=UNDEFINED, ) component = InputAttribute( docstring="The component whose mole fraction to weight by.", type_hint=Substance, default_value=UNDEFINED, ) full_substance = InputAttribute( docstring="The full substance which describes the mole fraction of the component.", type_hint=Substance, default_value=UNDEFINED, ) weighted_value = OutputAttribute( "The value weighted by the `component`s mole fraction as determined from the " "`full_substance`.", type_hint=typing.Union[ float, int, pint.Measurement, pint.Quantity, ParameterGradient ], ) def _weight_values(self, mole_fraction): """Weights a value by a components mole fraction. Parameters ---------- mole_fraction: float The mole fraction to weight by. Returns ------- float, int, pint.Measurement, pint.Quantity, ParameterGradient The weighted value. """ return self.value * mole_fraction def _execute(self, directory, available_resources): assert len(self.component.components) == 1 main_component = self.component.components[0] amounts = self.full_substance.get_amounts(main_component) if len(amounts) != 1: raise ValueError( f"More than one type of amount was defined for component " f"{main_component}. Only a single mole fraction must be defined.", ) amount = next(iter(amounts)) if not isinstance(amount, MoleFraction): raise ValueError( f"The component {main_component} was given as an exact amount, and " f"not a mole fraction" ) self.weighted_value = self._weight_values(amount.value)
class CentralDifferenceGradient(Protocol): """A protocol which employs the central diference method to estimate the gradient of an observable A, such that grad = (A(x-h) - A(x+h)) / (2h) Notes ----- The `values` input must either be a list of pint.Quantity, a ProtocolPath to a list of pint.Quantity, or a list of ProtocolPath which each point to a pint.Quantity. """ parameter_key = InputAttribute( docstring="The key of the parameter to differentiate with respect to.", type_hint=ParameterGradientKey, default_value=UNDEFINED, ) reverse_observable_value = InputAttribute( docstring="The value of the observable evaluated using the parameters" "perturbed in the reverse direction.", type_hint=typing.Union[pint.Quantity, pint.Measurement], default_value=UNDEFINED, ) forward_observable_value = InputAttribute( docstring="The value of the observable evaluated using the parameters" "perturbed in the forward direction.", type_hint=typing.Union[pint.Quantity, pint.Measurement], default_value=UNDEFINED, ) reverse_parameter_value = InputAttribute( docstring= "The value of the parameter perturbed in the reverse direction.", type_hint=pint.Quantity, default_value=UNDEFINED, ) forward_parameter_value = InputAttribute( docstring= "The value of the parameter perturbed in the forward direction.", type_hint=pint.Quantity, default_value=UNDEFINED, ) gradient = OutputAttribute(docstring="The estimated gradient", type_hint=ParameterGradient) def _execute(self, directory, available_resources): if self.forward_parameter_value < self.reverse_parameter_value: raise ValueError( f"The forward parameter value ({self.forward_parameter_value}) must " f"be larger than the reverse value ({self.reverse_parameter_value})." ) reverse_value = self.reverse_observable_value forward_value = self.forward_observable_value if isinstance(reverse_value, pint.Measurement): reverse_value = reverse_value.value if isinstance(forward_value, pint.Measurement): forward_value = forward_value.value gradient = (forward_value - reverse_value) / ( self.forward_parameter_value - self.reverse_parameter_value) self.gradient = ParameterGradient(self.parameter_key, gradient)
class BaseGradientPotentials(Protocol, abc.ABC): """A base class for protocols which will evaluate the reduced potentials of a series of configurations using a set of force field parameters which have been slightly increased and slightly decreased. These are mainly useful when estimating gradients with respect to force field parameters using the central difference method. """ force_field_path = InputAttribute( docstring="The path to the force field which contains the parameters to " "differentiate the observable with respect to. When reweighting " "observables, this should be the `target` force field.", type_hint=str, default_value=UNDEFINED, ) statistics_path = InputAttribute( docstring="The path to a statistics array containing potentials " "evaluated at each frame of the trajectory using the input " "`force_field_path` and at the input `thermodynamic_state`.", type_hint=str, default_value=UNDEFINED, ) thermodynamic_state = InputAttribute( docstring="The thermodynamic state to estimate the gradients at. When " "reweighting observables, this should be the `target` state.", type_hint=ThermodynamicState, default_value=UNDEFINED, ) substance = InputAttribute( docstring= "The substance which describes the composition of the system.", type_hint=Substance, default_value=UNDEFINED, ) coordinate_file_path = InputAttribute( docstring= "A path to a PDB coordinate file which describes the topology of " "the system.", type_hint=str, default_value=UNDEFINED, ) trajectory_file_path = InputAttribute( docstring="A path to the trajectory of configurations", type_hint=str, default_value=UNDEFINED, ) enable_pbc = InputAttribute( docstring="If true, periodic boundary conditions will be enabled when " "re-evaluating the reduced potentials.", type_hint=bool, default_value=True, ) parameter_key = InputAttribute( docstring="The key of the parameter to differentiate with respect to.", type_hint=ParameterGradientKey, default_value=UNDEFINED, ) perturbation_scale = InputAttribute( docstring="The amount to perturb the parameter by, such that " "p_new = p_old * (1 +/- `perturbation_scale`)", type_hint=float, default_value=1.0e-4, ) use_subset_of_force_field = InputAttribute( docstring="If true, the reduced potentials will be estimated using " "a system which only contains the parameters of interest, e.g. if the " "gradient of interest is with respect to the VdW epsilon parameter, then " "all valence / electrostatic terms will be ignored.", type_hint=bool, default_value=True, ) effective_sample_indices = InputAttribute( docstring= "This a placeholder input which is not currently implemented.", type_hint=list, default_value=UNDEFINED, optional=True, ) reverse_potentials_path = OutputAttribute( docstring="A file path to the energies evaluated using the parameters" "perturbed in the reverse direction.", type_hint=str, ) forward_potentials_path = OutputAttribute( docstring="A file path to the energies evaluated using the parameters" "perturbed in the forward direction.", type_hint=str, ) reverse_parameter_value = OutputAttribute( docstring= "The value of the parameter perturbed in the reverse direction.", type_hint=pint.Quantity, ) forward_parameter_value = OutputAttribute( docstring= "The value of the parameter perturbed in the forward direction.", type_hint=pint.Quantity, )