def default_simulation_schema(absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000) -> SimulationSchema: """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_simulation_protocols( analysis.AverageObservable("average_density"), use_target_uncertainty, n_molecules=n_molecules, ) # Specify that the average density should be estimated. protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.Density.value}]", protocols.production_simulation.id, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.decorrelate_trajectory.schema, protocols.decorrelate_observables.schema, ] schema.outputs_to_store = {"full_system": output_to_store} schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ) -> ReweightingSchema: """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance protocols, data_replicator = generate_reweighting_protocols( ObservableType.Density) protocols.reweight_observable.required_effective_samples = n_effective_samples schema = WorkflowSchema() schema.protocol_schemas = [x.schema for x in protocols] schema.protocol_replicators = [data_replicator] schema.final_value_source = ProtocolPath( "value", protocols.reweight_observable.id) calculation_schema.workflow_schema = schema return calculation_schema
def _get_schema(self): """Returns the schema that describes this workflow. Returns ------- WorkflowSchema The schema that describes this workflow. """ schema = WorkflowSchema() schema.id = self.uuid schema.protocol_schemas = [ copy.deepcopy(x.schema) for x in self._protocols ] if self._final_value_source != UNDEFINED: schema.final_value_source = self._final_value_source.copy() schema.outputs_to_store = copy.deepcopy(self._outputs_to_store) return schema
def default_yank_schema(existing_schema=None): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- existing_schema: SimulationSchema, optional An existing schema whose settings to use. If set, the schema's `workflow_schema` will be overwritten by this method. Returns ------- SimulationSchema The schema to follow when estimating this property. """ calculation_schema = SimulationSchema() if existing_schema is not None: assert isinstance(existing_schema, SimulationSchema) calculation_schema = copy.deepcopy(existing_schema) schema = WorkflowSchema( property_type=HostGuestBindingAffinity.__name__) schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema") # Initial coordinate and topology setup. filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand") filter_ligand.input_substance = ProtocolPath("substance", "global") filter_ligand.component_roles = [Component.Role.Ligand] # We only support substances with a single guest ligand. filter_ligand.expected_components = 1 schema.protocols[filter_ligand.id] = filter_ligand.schema # Construct the protocols which will (for now) take as input a set of host coordinates, # and generate a set of charges for them. filter_receptor = miscellaneous.FilterSubstanceByRole( "filter_receptor") filter_receptor.input_substance = ProtocolPath("substance", "global") filter_receptor.component_roles = [Component.Role.Receptor] # We only support substances with a single host receptor. filter_receptor.expected_components = 1 schema.protocols[filter_receptor.id] = filter_receptor.schema # Perform docking to position the guest within the host. perform_docking = coordinates.BuildDockedCoordinates("perform_docking") perform_docking.ligand_substance = ProtocolPath( "filtered_substance", filter_ligand.id) perform_docking.receptor_coordinate_file = ProtocolPath( "receptor_mol2", "global") schema.protocols[perform_docking.id] = perform_docking.schema # Solvate the docked structure using packmol filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent") filter_solvent.input_substance = ProtocolPath("substance", "global") filter_solvent.component_roles = [Component.Role.Solvent] schema.protocols[filter_solvent.id] = filter_solvent.schema solvate_complex = coordinates.SolvateExistingStructure( "solvate_complex") solvate_complex.max_molecules = 1000 solvate_complex.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_complex.solute_coordinate_file = ProtocolPath( "docked_complex_coordinate_path", perform_docking.id) schema.protocols[solvate_complex.id] = solvate_complex.schema # Assign force field parameters to the solvated complex system. build_solvated_complex_system = forcefield.BaseBuildSystem( "build_solvated_complex_system") build_solvated_complex_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_complex_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_complex.id) build_solvated_complex_system.substance = ProtocolPath( "substance", "global") build_solvated_complex_system.charged_molecule_paths = [ ProtocolPath("receptor_mol2", "global") ] schema.protocols[build_solvated_complex_system. id] = build_solvated_complex_system.schema # Solvate the ligand using packmol solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand") solvate_ligand.max_molecules = 1000 solvate_ligand.substance = ProtocolPath("filtered_substance", filter_solvent.id) solvate_ligand.solute_coordinate_file = ProtocolPath( "docked_ligand_coordinate_path", perform_docking.id) schema.protocols[solvate_ligand.id] = solvate_ligand.schema # Assign force field parameters to the solvated ligand system. build_solvated_ligand_system = forcefield.BaseBuildSystem( "build_solvated_ligand_system") build_solvated_ligand_system.force_field_path = ProtocolPath( "force_field_path", "global") build_solvated_ligand_system.coordinate_file_path = ProtocolPath( "coordinate_file_path", solvate_ligand.id) build_solvated_ligand_system.substance = ProtocolPath( "substance", "global") schema.protocols[build_solvated_ligand_system. id] = build_solvated_ligand_system.schema # Employ YANK to estimate the binding free energy. yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol") yank_protocol.thermodynamic_state = ProtocolPath( "thermodynamic_state", "global") yank_protocol.number_of_iterations = 2000 yank_protocol.steps_per_iteration = 500 yank_protocol.checkpoint_interval = 10 yank_protocol.verbose = True yank_protocol.force_field_path = ProtocolPath("force_field_path", "global") yank_protocol.ligand_residue_name = ProtocolPath( "ligand_residue_name", perform_docking.id) yank_protocol.receptor_residue_name = ProtocolPath( "receptor_residue_name", perform_docking.id) yank_protocol.solvated_ligand_coordinates = ProtocolPath( "coordinate_file_path", solvate_ligand.id) yank_protocol.solvated_ligand_system = ProtocolPath( "parameterized_system", build_solvated_ligand_system.id) yank_protocol.solvated_complex_coordinates = ProtocolPath( "coordinate_file_path", solvate_complex.id) yank_protocol.solvated_complex_system = ProtocolPath( "parameterized_system", build_solvated_complex_system.id) schema.protocols[yank_protocol.id] = yank_protocol.schema # Define where the final values come from. schema.final_value_source = ProtocolPath("free_energy_difference", yank_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( cls, absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000, ) -> SimulationSchema: """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = (absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED) # Define the protocols to use for the fully mixed system. ( mixture_protocols, mixture_value, mixture_stored_data, ) = generate_simulation_protocols( analysis.AverageObservable("extract_observable_mixture"), use_target_uncertainty, id_suffix="_mixture", n_molecules=n_molecules, ) # Specify the average observable which should be estimated. mixture_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{cls._observable_type().value}]", mixture_protocols.production_simulation.id, ) ( mixture_protocols.analysis_protocol.divisor, mixture_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath("output_number_of_molecules", mixture_protocols.build_coordinates.id), "_mixture", ) # Define the protocols to use for each component, creating a replicator that # will copy these for each component in the mixture substance. component_replicator = ProtocolReplicator("component_replicator") component_replicator.template_values = ProtocolPath( "components", "global") component_substance = ReplicatorValue(component_replicator.id) component_protocols, _, component_stored_data = generate_simulation_protocols( analysis.AverageObservable( f"extract_observable_component_{component_replicator.placeholder_id}" ), use_target_uncertainty, id_suffix=f"_component_{component_replicator.placeholder_id}", n_molecules=n_molecules, ) # Make sure the protocols point to the correct substance. component_protocols.build_coordinates.substance = component_substance # Specify the average observable which should be estimated. component_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{cls._observable_type().value}]", component_protocols.production_simulation.id, ) ( component_protocols.analysis_protocol.divisor, component_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath("output_number_of_molecules", component_protocols.build_coordinates.id), f"_component_{component_replicator.placeholder_id}", ) # Weight the component value by the mole fraction. weight_by_mole_fraction = miscellaneous.WeightByMoleFraction( f"weight_by_mole_fraction_{component_replicator.placeholder_id}") weight_by_mole_fraction.value = ProtocolPath( "value", component_protocols.analysis_protocol.id) weight_by_mole_fraction.full_substance = ProtocolPath( "substance", "global") weight_by_mole_fraction.component = component_substance component_protocols.converge_uncertainty.add_protocols( weight_by_mole_fraction) # Make sure the convergence criteria is set to use the per component # uncertainty target. if use_target_uncertainty: component_protocols.converge_uncertainty.conditions[ 0].right_hand_value = ProtocolPath("per_component_uncertainty", "global") # Finally, set up the protocols which will be responsible for adding together # the component observables, and subtracting these from the mixture system value. add_component_observables = miscellaneous.AddValues( "add_component_observables") add_component_observables.values = ProtocolPath( "weighted_value", component_protocols.converge_uncertainty.id, weight_by_mole_fraction.id, ) calculate_excess_observable = miscellaneous.SubtractValues( "calculate_excess_observable") calculate_excess_observable.value_b = mixture_value calculate_excess_observable.value_a = ProtocolPath( "result", add_component_observables.id) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ component_protocols.build_coordinates.schema, component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.schema, component_protocols.decorrelate_trajectory.schema, component_protocols.decorrelate_observables.schema, mixture_protocols.build_coordinates.schema, mixture_protocols.assign_parameters.schema, mixture_protocols.energy_minimisation.schema, mixture_protocols.equilibration_simulation.schema, mixture_protocols.converge_uncertainty.schema, mixture_protocols.decorrelate_trajectory.schema, mixture_protocols.decorrelate_observables.schema, add_component_observables.schema, calculate_excess_observable.schema, ] if component_n_molar_molecules is not None: schema.protocol_schemas.append(component_n_molar_molecules.schema) if mixture_n_molar_molecules is not None: schema.protocol_schemas.append(mixture_n_molar_molecules.schema) schema.protocol_replicators = [component_replicator] schema.final_value_source = ProtocolPath( "result", calculate_excess_observable.id) schema.outputs_to_store = { "full_system": mixture_stored_data, f"component_{component_replicator.placeholder_id}": component_stored_data, } calculation_schema.workflow_schema = schema return calculation_schema
def _default_reweighting_schema( cls, observable_type: ObservableType, absolute_tolerance: unit.Quantity = UNDEFINED, relative_tolerance: float = UNDEFINED, n_effective_samples: int = 50, ) -> ReweightingSchema: """Returns the default calculation schema to use when estimating this class of property by re-weighting cached simulation data. This internal implementation allows re-weighting a different observable than may be specified by the `_observable_type` class property. Parameters ---------- absolute_tolerance The absolute tolerance to estimate the property to within. relative_tolerance The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- The default re-weighting calculation schema. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Set up the storage queries calculation_schema.storage_queries = cls._default_reweighting_storage_query( ) # Define the protocols which will re-weight the observable computed for the # fully mixed system. mixture_protocols, mixture_data_replicator = generate_reweighting_protocols( observable_type, "mixture_data_replicator", "_mixture", ) mixture_protocols.reweight_observable.required_effective_samples = ( n_effective_samples) divide_by_mixture_molecules = miscellaneous.DivideValue( "divide_by_mixture_molecules") divide_by_mixture_molecules.value = ProtocolPath( "value", mixture_protocols.reweight_observable.id) ( divide_by_mixture_molecules.divisor, mixture_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath( "total_number_of_molecules", mixture_protocols.unpack_stored_data.id.replace( mixture_data_replicator.placeholder_id, "0"), ), "_mixture", ) # Define the protocols to use for each component, creating a replicator that # will copy these for each component in the full substance. component_replicator = ProtocolReplicator("component_replicator") component_replicator.template_values = ProtocolPath( "components", "global") component_protocols, component_data_replicator = generate_reweighting_protocols( observable_type, f"component_{component_replicator.placeholder_id}_data_replicator", f"_component_{component_replicator.placeholder_id}", ) component_protocols.reweight_observable.required_effective_samples = ( n_effective_samples) component_data_replicator.template_values = ProtocolPath( f"component_data[$({component_replicator.id})]", "global") divide_by_component_molecules = miscellaneous.DivideValue( f"divide_by_component_{component_replicator.placeholder_id}_molecules" ) divide_by_component_molecules.value = ProtocolPath( "value", component_protocols.reweight_observable.id) ( divide_by_component_molecules.divisor, component_n_molar_molecules, ) = cls._n_molecules_divisor( ProtocolPath( "total_number_of_molecules", component_protocols.unpack_stored_data.id.replace( component_data_replicator.placeholder_id, "0"), ), f"_component_{component_replicator.placeholder_id}", ) # Make sure the protocols point to the correct substance. component_substance = ReplicatorValue(component_replicator.id) component_protocols.build_reference_system.substance = component_substance component_protocols.build_target_system.substance = component_substance # Weight the component value by the mole fraction. weight_by_mole_fraction = miscellaneous.WeightByMoleFraction( f"weight_by_mole_fraction_{component_replicator.placeholder_id}") weight_by_mole_fraction.value = ProtocolPath( "result", divide_by_component_molecules.id) weight_by_mole_fraction.full_substance = ProtocolPath( "substance", "global") weight_by_mole_fraction.component = component_substance # Finally, set up the protocols which will be responsible for adding together # the component observables, and subtracting these from the full system value. add_component_observables = miscellaneous.AddValues( "add_component_observables") add_component_observables.values = ProtocolPath( "weighted_value", weight_by_mole_fraction.id, ) calculate_excess_observable = miscellaneous.SubtractValues( "calculate_excess_observable") calculate_excess_observable.value_b = ProtocolPath( "result", divide_by_mixture_molecules.id) calculate_excess_observable.value_a = ProtocolPath( "result", add_component_observables.id) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ *[x.schema for x in mixture_protocols if x is not None], divide_by_mixture_molecules.schema, *[x.schema for x in component_protocols if x is not None], divide_by_component_molecules.schema, weight_by_mole_fraction.schema, add_component_observables.schema, calculate_excess_observable.schema, ] if component_n_molar_molecules is not None: schema.protocol_schemas.append(component_n_molar_molecules.schema) if mixture_n_molar_molecules is not None: schema.protocol_schemas.append(mixture_n_molar_molecules.schema) schema.protocol_replicators = [ mixture_data_replicator, component_replicator, component_data_replicator, ] schema.final_value_source = ProtocolPath( "result", calculate_excess_observable.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( cls, absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ): """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Set up the storage queries calculation_schema.storage_queries = cls._default_reweighting_storage_query() # Set up a protocol to extract the liquid phase energy from the existing data. liquid_protocols, liquid_replicator = generate_reweighting_protocols( ObservableType.PotentialEnergy, id_suffix="_liquid", replicator_id="liquid_data_replicator", ) liquid_replicator.template_values = ProtocolPath("liquid_data", "global") liquid_protocols.reweight_observable.required_effective_samples = ( n_effective_samples ) # Dive the potential by the number of liquid phase molecules from the first # piece of cached data. divide_by_liquid_molecules = miscellaneous.DivideValue( "divide_by_liquid_molecules" ) divide_by_liquid_molecules.value = ProtocolPath( "value", liquid_protocols.reweight_observable.id ) divide_by_liquid_molecules.divisor = ProtocolPath( "total_number_of_molecules", liquid_protocols.unpack_stored_data.id.replace( liquid_replicator.placeholder_id, "0" ), ) # Set up a protocol to extract the gas phase energy from the existing data. gas_protocols, gas_replicator = generate_reweighting_protocols( ObservableType.PotentialEnergy, id_suffix="_gas", replicator_id="gas_data_replicator", ) gas_replicator.template_values = ProtocolPath("gas_data", "global") gas_protocols.reweight_observable.required_effective_samples = ( n_effective_samples ) # Turn of PBC for the gas phase. gas_protocols.evaluate_reference_potential.enable_pbc = False gas_protocols.evaluate_target_potential.enable_pbc = False # Combine the values to estimate the final enthalpy of vaporization energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization") energy_of_vaporization.value_b = ProtocolPath( "value", gas_protocols.reweight_observable.id ) energy_of_vaporization.value_a = ProtocolPath( "result", divide_by_liquid_molecules.id ) ideal_volume = miscellaneous.MultiplyValue("ideal_volume") ideal_volume.value = 1.0 * unit.molar_gas_constant ideal_volume.multiplier = ProtocolPath( "thermodynamic_state.temperature", "global" ) enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization") enthalpy_of_vaporization.values = [ ProtocolPath("result", energy_of_vaporization.id), ProtocolPath("result", ideal_volume.id), ] # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ *(x.schema for x in liquid_protocols if x is not None), *(x.schema for x in gas_protocols if x is not None), divide_by_liquid_molecules.schema, energy_of_vaporization.schema, ideal_volume.schema, enthalpy_of_vaporization.schema, ] schema.protocol_replicators = [liquid_replicator, gas_replicator] schema.final_value_source = ProtocolPath("result", enthalpy_of_vaporization.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: openff.evaluator.unit.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define a custom conditional group which will ensure both the liquid and # gas enthalpies are estimated to within the specified uncertainty tolerance. converge_uncertainty = groups.ConditionalGroup("conditional_group") converge_uncertainty.max_iterations = 100 # Define the protocols to perform the simulation in the liquid phase. average_liquid_energy = analysis.AverageObservable("average_liquid_potential") average_liquid_energy.divisor = n_molecules ( liquid_protocols, liquid_value_source, liquid_output_to_store, ) = generate_simulation_protocols( average_liquid_energy, use_target_uncertainty, "_liquid", converge_uncertainty, n_molecules=n_molecules, ) liquid_output_to_store.property_phase = PropertyPhase.Liquid liquid_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", liquid_protocols.production_simulation.id, ) # Define the protocols to perform the simulation in the gas phase. average_gas_energy = analysis.AverageObservable("average_gas_potential") ( gas_protocols, gas_value_source, gas_output_to_store, ) = generate_simulation_protocols( average_gas_energy, use_target_uncertainty, "_gas", converge_uncertainty, n_molecules=1, ) gas_output_to_store.property_phase = PropertyPhase.Gas gas_protocols.analysis_protocol.observable = ProtocolPath( f"observables[{ObservableType.PotentialEnergy.value}]", gas_protocols.production_simulation.id, ) # Specify that for the gas phase only a single molecule in vacuum should be # created. gas_protocols.build_coordinates.max_molecules = 1 gas_protocols.build_coordinates.mass_density = ( 0.01 * unit.gram / unit.milliliter ) # Run the gas phase simulations in the NVT ensemble without PBC gas_protocols.energy_minimisation.enable_pbc = False gas_protocols.equilibration_simulation.ensemble = Ensemble.NVT gas_protocols.equilibration_simulation.enable_pbc = False gas_protocols.production_simulation.ensemble = Ensemble.NVT gas_protocols.production_simulation.enable_pbc = False gas_protocols.production_simulation.steps_per_iteration = 15000000 gas_protocols.production_simulation.output_frequency = 5000 gas_protocols.production_simulation.checkpoint_frequency = 100 # Due to a bizarre issue where the OMM Reference platform is # the fastest at computing properties of a single molecule # in vacuum, we enforce those inputs which will force the # gas calculations to run on the Reference platform. gas_protocols.equilibration_simulation.high_precision = True gas_protocols.equilibration_simulation.allow_gpu_platforms = False gas_protocols.production_simulation.high_precision = True gas_protocols.production_simulation.allow_gpu_platforms = False # Combine the values to estimate the final energy of vaporization energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization") energy_of_vaporization.value_b = ProtocolPath("value", average_gas_energy.id) energy_of_vaporization.value_a = ProtocolPath("value", average_liquid_energy.id) ideal_volume = miscellaneous.MultiplyValue("ideal_volume") ideal_volume.value = 1.0 * unit.molar_gas_constant ideal_volume.multiplier = ProtocolPath( "thermodynamic_state.temperature", "global" ) enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization") enthalpy_of_vaporization.values = [ ProtocolPath("result", energy_of_vaporization.id), ProtocolPath("result", ideal_volume.id), ] # Add the extra protocols and conditions to the custom conditional group. converge_uncertainty.add_protocols( energy_of_vaporization, ideal_volume, enthalpy_of_vaporization ) if use_target_uncertainty: condition = groups.ConditionalGroup.Condition() condition.type = groups.ConditionalGroup.Condition.Type.LessThan condition.left_hand_value = ProtocolPath( "result.error", converge_uncertainty.id, enthalpy_of_vaporization.id, ) condition.right_hand_value = ProtocolPath("target_uncertainty", "global") gas_protocols.production_simulation.total_number_of_iterations = ( ProtocolPath("current_iteration", converge_uncertainty.id) ) liquid_protocols.production_simulation.total_number_of_iterations = ( ProtocolPath("current_iteration", converge_uncertainty.id) ) converge_uncertainty.add_condition(condition) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ liquid_protocols.build_coordinates.schema, liquid_protocols.assign_parameters.schema, liquid_protocols.energy_minimisation.schema, liquid_protocols.equilibration_simulation.schema, liquid_protocols.decorrelate_trajectory.schema, liquid_protocols.decorrelate_observables.schema, gas_protocols.build_coordinates.schema, gas_protocols.assign_parameters.schema, gas_protocols.energy_minimisation.schema, gas_protocols.equilibration_simulation.schema, gas_protocols.decorrelate_trajectory.schema, gas_protocols.decorrelate_observables.schema, converge_uncertainty.schema, ] schema.outputs_to_store = { "liquid_data": liquid_output_to_store, "gas_data": gas_output_to_store, } schema.final_value_source = ProtocolPath( "result", converge_uncertainty.id, enthalpy_of_vaporization.id ) calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ): """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance # Set up the storage queries calculation_schema.storage_queries = ( ExcessMolarVolume._default_reweighting_storage_query() ) # Set up a replicator that will re-run the component reweighting workflow for each # component in the system. component_replicator = ProtocolReplicator(replicator_id="component_replicator") component_replicator.template_values = ProtocolPath("components", "global") gradient_replicator = ProtocolReplicator("gradient") gradient_replicator.template_values = ProtocolPath( "parameter_gradient_keys", "global" ) # Set up the protocols which will reweight data for the full system. full_data_replicator_id = "full_data_replicator" ( full_protocols, full_volume, full_data_replicator, full_gradient_group, full_gradient_source, ) = ExcessMolarVolume._get_reweighting_protocols( "_full", gradient_replicator.id, full_data_replicator_id, n_effective_samples=n_effective_samples, ) # Set up the protocols which will reweight data for each component. component_data_replicator_id = ( f"component_{component_replicator.placeholder_id}_data_replicator" ) ( component_protocols, component_volumes, component_data_replicator, component_gradient_group, component_gradient_source, ) = ExcessMolarVolume._get_reweighting_protocols( "_component", gradient_replicator.id, component_data_replicator_id, replicator_id=component_replicator.id, weight_by_mole_fraction=True, substance_reference=ReplicatorValue(component_replicator.id), n_effective_samples=n_effective_samples, ) # Make sure the replicator is only replicating over component data. component_data_replicator.template_values = ProtocolPath( f"component_data[$({component_replicator.id})]", "global" ) add_component_molar_volumes = miscellaneous.AddValues( "add_component_molar_volumes" ) add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( "calculate_excess_potential" ) calculate_excess_volume.value_b = full_volume calculate_excess_volume.value_a = ProtocolPath( "result", add_component_molar_volumes.id ) # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f"add_component_gradients" f"_{gradient_replicator.placeholder_id}" ) add_component_gradients.values = component_gradient_source combine_gradients = miscellaneous.SubtractValues( f"combine_gradients_{gradient_replicator.placeholder_id}" ) combine_gradients.value_b = full_gradient_source combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id) # Build the final workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ *(x.schema for x in full_protocols), *(x.schema for x in component_protocols), add_component_molar_volumes.schema, calculate_excess_volume.schema, full_gradient_group.schema, component_gradient_group.schema, add_component_gradients.schema, combine_gradients.schema, ] schema.protocol_replicators = [ full_data_replicator, component_replicator, component_data_replicator, gradient_replicator, ] schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)] schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id) calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the id of the replicator which will clone the gradient protocols # for each gradient key to be estimated. gradient_replicator_id = "gradient_replicator" # Set up a workflow to calculate the molar volume of the full, mixed system. ( full_system_protocols, full_system_molar_molecules, full_system_volume, full_output, full_system_gradient_group, full_system_gradient_replicator, full_system_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_full", gradient_replicator_id, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Set up a general workflow for calculating the molar volume of one of the system components. component_replicator_id = "component_replicator" component_substance = ReplicatorValue(component_replicator_id) # Make sure to weight by the mole fractions of the actual full system as these may be slightly # different to the mole fractions of the measure property due to rounding. full_substance = ProtocolPath( "output_substance", full_system_protocols.build_coordinates.id ) ( component_protocols, component_molar_molecules, component_volumes, component_output, component_gradient_group, component_gradient_replicator, component_gradient, ) = ExcessMolarVolume._get_simulation_protocols( "_component", gradient_replicator_id, replicator_id=component_replicator_id, weight_by_mole_fraction=True, component_substance_reference=component_substance, full_substance_reference=full_substance, use_target_uncertainty=use_target_uncertainty, n_molecules=n_molecules, ) # Finally, set up the protocols which will be responsible for adding together # the component molar volumes, and subtracting these from the mixed system molar volume. add_component_molar_volumes = miscellaneous.AddValues( "add_component_molar_volumes" ) add_component_molar_volumes.values = component_volumes calculate_excess_volume = miscellaneous.SubtractValues( "calculate_excess_volume" ) calculate_excess_volume.value_b = full_system_volume calculate_excess_volume.value_a = ProtocolPath( "result", add_component_molar_volumes.id ) # Create the replicator object which defines how the pure component # molar volume estimation protocols will be replicated for each component. component_replicator = ProtocolReplicator(replicator_id=component_replicator_id) component_replicator.template_values = ProtocolPath("components", "global") # Combine the gradients. add_component_gradients = miscellaneous.AddValues( f"add_component_gradients" f"_$({gradient_replicator_id})" ) add_component_gradients.values = component_gradient combine_gradients = miscellaneous.SubtractValues( f"combine_gradients_$({gradient_replicator_id})" ) combine_gradients.value_b = full_system_gradient combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id) # Combine the gradient replicators. gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id) gradient_replicator.template_values = ProtocolPath( "parameter_gradient_keys", "global" ) # Build the final workflow schema schema = WorkflowSchema() schema.protocol_schemas = [ component_protocols.build_coordinates.schema, component_protocols.assign_parameters.schema, component_protocols.energy_minimisation.schema, component_protocols.equilibration_simulation.schema, component_protocols.converge_uncertainty.schema, component_molar_molecules.schema, full_system_protocols.build_coordinates.schema, full_system_protocols.assign_parameters.schema, full_system_protocols.energy_minimisation.schema, full_system_protocols.equilibration_simulation.schema, full_system_protocols.converge_uncertainty.schema, full_system_molar_molecules.schema, component_protocols.extract_uncorrelated_trajectory.schema, component_protocols.extract_uncorrelated_statistics.schema, full_system_protocols.extract_uncorrelated_trajectory.schema, full_system_protocols.extract_uncorrelated_statistics.schema, add_component_molar_volumes.schema, calculate_excess_volume.schema, component_gradient_group.schema, full_system_gradient_group.schema, add_component_gradients.schema, combine_gradients.schema, ] schema.protocol_replicators = [gradient_replicator, component_replicator] # Finally, tell the schemas where to look for its final values. schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)] schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id) schema.outputs_to_store = { "full_system": full_output, f"component_$({component_replicator_id})": component_output, } calculation_schema.workflow_schema = schema return calculation_schema
def default_simulation_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000 ): """Returns the default calculation schema to use when estimating this class of property from direct simulations. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_molecules: int The number of molecules to use in the simulation. Returns ------- SimulationSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = SimulationSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance use_target_uncertainty = ( absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED ) # Define the protocol which will extract the average density from # the results of a simulation. extract_density = analysis.ExtractAverageStatistic("extract_density") extract_density.statistics_type = ObservableType.Density # Define the protocols which will run the simulation itself. protocols, value_source, output_to_store = generate_base_simulation_protocols( extract_density, use_target_uncertainty, n_molecules=n_molecules, ) # Set up the gradient calculations coordinate_source = ProtocolPath( "output_coordinate_file", protocols.equilibration_simulation.id ) trajectory_source = ProtocolPath( "trajectory_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) statistics_source = ProtocolPath( "statistics_file_path", protocols.converge_uncertainty.id, protocols.production_simulation.id, ) reweight_density_template = reweighting.ReweightStatistics("") reweight_density_template.statistics_type = ObservableType.Density reweight_density_template.statistics_paths = statistics_source reweight_density_template.reference_reduced_potentials = statistics_source ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( reweight_density_template, ProtocolPath("force_field_path", "global"), coordinate_source, trajectory_source, statistics_source, ) # Build the workflow schema. schema = WorkflowSchema() schema.protocol_schemas = [ protocols.build_coordinates.schema, protocols.assign_parameters.schema, protocols.energy_minimisation.schema, protocols.equilibration_simulation.schema, protocols.converge_uncertainty.schema, protocols.extract_uncorrelated_trajectory.schema, protocols.extract_uncorrelated_statistics.schema, gradient_group.schema, ] schema.protocol_replicators = [gradient_replicator] schema.outputs_to_store = {"full_system": output_to_store} schema.gradients_sources = [gradient_source] schema.final_value_source = value_source calculation_schema.workflow_schema = schema return calculation_schema
def default_reweighting_schema( absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_effective_samples=50, ): """Returns the default calculation schema to use when estimating this property by reweighting existing data. Parameters ---------- absolute_tolerance: pint.Quantity, optional The absolute tolerance to estimate the property to within. relative_tolerance: float The tolerance (as a fraction of the properties reported uncertainty) to estimate the property to within. n_effective_samples: int The minimum number of effective samples to require when reweighting the cached simulation data. Returns ------- ReweightingSchema The schema to follow when estimating this property. """ assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED calculation_schema = ReweightingSchema() calculation_schema.absolute_tolerance = absolute_tolerance calculation_schema.relative_tolerance = relative_tolerance data_replicator_id = "data_replicator" # The protocol which will be used to calculate the densities from # the existing data. density_calculation = analysis.ExtractAverageStatistic( f"calc_density_$({data_replicator_id})" ) density_calculation.statistics_type = ObservableType.Density reweight_density = reweighting.ReweightStatistics("reweight_density") reweight_density.statistics_type = ObservableType.Density reweight_density.required_effective_samples = n_effective_samples protocols, data_replicator = generate_base_reweighting_protocols( density_calculation, reweight_density, data_replicator_id ) # Set up the gradient calculations coordinate_path = ProtocolPath( "output_coordinate_path", protocols.concatenate_trajectories.id ) trajectory_path = ProtocolPath( "output_trajectory_path", protocols.concatenate_trajectories.id ) statistics_path = ProtocolPath( "statistics_file_path", protocols.reduced_target_potential.id ) reweight_density_template = copy.deepcopy(reweight_density) ( gradient_group, gradient_replicator, gradient_source, ) = generate_gradient_protocol_group( reweight_density_template, ProtocolPath("force_field_path", "global"), coordinate_path, trajectory_path, statistics_path, replicator_id="grad", effective_sample_indices=ProtocolPath( "effective_sample_indices", protocols.mbar_protocol.id ), ) schema = WorkflowSchema() schema.protocol_schemas = [ *(x.schema for x in protocols), gradient_group.schema, ] schema.protocol_replicators = [data_replicator, gradient_replicator] schema.gradients_sources = [gradient_source] schema.final_value_source = ProtocolPath("value", protocols.mbar_protocol.id) calculation_schema.workflow_schema = schema return calculation_schema