def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=1000) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_simulation_protocols(
            analysis.AverageObservable("average_density"),
            use_target_uncertainty,
            n_molecules=n_molecules,
        )
        # Specify that the average density should be estimated.
        protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.Density.value}]",
            protocols.production_simulation.id,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.decorrelate_trajectory.schema,
            protocols.decorrelate_observables.schema,
        ]

        schema.outputs_to_store = {"full_system": output_to_store}
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        protocols, data_replicator = generate_reweighting_protocols(
            ObservableType.Density)
        protocols.reweight_observable.required_effective_samples = n_effective_samples

        schema = WorkflowSchema()
        schema.protocol_schemas = [x.schema for x in protocols]
        schema.protocol_replicators = [data_replicator]

        schema.final_value_source = ProtocolPath(
            "value", protocols.reweight_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def _get_schema(self):
        """Returns the schema that describes this workflow.

        Returns
        -------
        WorkflowSchema
            The schema that describes this workflow.
        """
        schema = WorkflowSchema()

        schema.id = self.uuid
        schema.protocol_schemas = [
            copy.deepcopy(x.schema) for x in self._protocols
        ]

        if self._final_value_source != UNDEFINED:
            schema.final_value_source = self._final_value_source.copy()

        schema.outputs_to_store = copy.deepcopy(self._outputs_to_store)

        return schema
Exemple #4
0
    def default_yank_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "parameterized_system", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "parameterized_system", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("free_energy_difference",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def default_simulation_schema(
        cls,
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_molecules=1000,
    ) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols to use for the fully mixed system.
        (
            mixture_protocols,
            mixture_value,
            mixture_stored_data,
        ) = generate_simulation_protocols(
            analysis.AverageObservable("extract_observable_mixture"),
            use_target_uncertainty,
            id_suffix="_mixture",
            n_molecules=n_molecules,
        )
        # Specify the average observable which should be estimated.
        mixture_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            mixture_protocols.production_simulation.id,
        )
        (
            mixture_protocols.analysis_protocol.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         mixture_protocols.build_coordinates.id),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the mixture substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols, _, component_stored_data = generate_simulation_protocols(
            analysis.AverageObservable(
                f"extract_observable_component_{component_replicator.placeholder_id}"
            ),
            use_target_uncertainty,
            id_suffix=f"_component_{component_replicator.placeholder_id}",
            n_molecules=n_molecules,
        )
        # Make sure the protocols point to the correct substance.
        component_protocols.build_coordinates.substance = component_substance
        # Specify the average observable which should be estimated.
        component_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            component_protocols.production_simulation.id,
        )
        (
            component_protocols.analysis_protocol.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         component_protocols.build_coordinates.id),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "value", component_protocols.analysis_protocol.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        component_protocols.converge_uncertainty.add_protocols(
            weight_by_mole_fraction)

        # Make sure the convergence criteria is set to use the per component
        # uncertainty target.
        if use_target_uncertainty:
            component_protocols.converge_uncertainty.conditions[
                0].right_hand_value = ProtocolPath("per_component_uncertainty",
                                                   "global")

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the mixture system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            component_protocols.converge_uncertainty.id,
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = mixture_value
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_protocols.decorrelate_trajectory.schema,
            component_protocols.decorrelate_observables.schema,
            mixture_protocols.build_coordinates.schema,
            mixture_protocols.assign_parameters.schema,
            mixture_protocols.energy_minimisation.schema,
            mixture_protocols.equilibration_simulation.schema,
            mixture_protocols.converge_uncertainty.schema,
            mixture_protocols.decorrelate_trajectory.schema,
            mixture_protocols.decorrelate_observables.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [component_replicator]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        schema.outputs_to_store = {
            "full_system":
            mixture_stored_data,
            f"component_{component_replicator.placeholder_id}":
            component_stored_data,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def _default_reweighting_schema(
        cls,
        observable_type: ObservableType,
        absolute_tolerance: unit.Quantity = UNDEFINED,
        relative_tolerance: float = UNDEFINED,
        n_effective_samples: int = 50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating this class of
        property by re-weighting cached simulation data.

        This internal implementation allows re-weighting a different observable than
        may be specified by the `_observable_type` class property.

        Parameters
        ----------
        absolute_tolerance
            The absolute tolerance to estimate the property to within.
        relative_tolerance
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
            The default re-weighting calculation schema.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = cls._default_reweighting_storage_query(
        )

        # Define the protocols which will re-weight the observable computed for the
        # fully mixed system.
        mixture_protocols, mixture_data_replicator = generate_reweighting_protocols(
            observable_type,
            "mixture_data_replicator",
            "_mixture",
        )
        mixture_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)

        divide_by_mixture_molecules = miscellaneous.DivideValue(
            "divide_by_mixture_molecules")
        divide_by_mixture_molecules.value = ProtocolPath(
            "value", mixture_protocols.reweight_observable.id)
        (
            divide_by_mixture_molecules.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                mixture_protocols.unpack_stored_data.id.replace(
                    mixture_data_replicator.placeholder_id, "0"),
            ),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the full substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")

        component_protocols, component_data_replicator = generate_reweighting_protocols(
            observable_type,
            f"component_{component_replicator.placeholder_id}_data_replicator",
            f"_component_{component_replicator.placeholder_id}",
        )
        component_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global")

        divide_by_component_molecules = miscellaneous.DivideValue(
            f"divide_by_component_{component_replicator.placeholder_id}_molecules"
        )
        divide_by_component_molecules.value = ProtocolPath(
            "value", component_protocols.reweight_observable.id)
        (
            divide_by_component_molecules.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                component_protocols.unpack_stored_data.id.replace(
                    component_data_replicator.placeholder_id, "0"),
            ),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Make sure the protocols point to the correct substance.
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols.build_reference_system.substance = component_substance
        component_protocols.build_target_system.substance = component_substance

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "result", divide_by_component_molecules.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the full system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = ProtocolPath(
            "result", divide_by_mixture_molecules.id)
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *[x.schema for x in mixture_protocols if x is not None],
            divide_by_mixture_molecules.schema,
            *[x.schema for x in component_protocols if x is not None],
            divide_by_component_molecules.schema,
            weight_by_mole_fraction.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [
            mixture_data_replicator,
            component_replicator,
            component_data_replicator,
        ]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def default_reweighting_schema(
        cls,
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = cls._default_reweighting_storage_query()

        # Set up a protocol to extract the liquid phase energy from the existing data.
        liquid_protocols, liquid_replicator = generate_reweighting_protocols(
            ObservableType.PotentialEnergy,
            id_suffix="_liquid",
            replicator_id="liquid_data_replicator",
        )
        liquid_replicator.template_values = ProtocolPath("liquid_data", "global")
        liquid_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples
        )

        # Dive the potential by the number of liquid phase molecules from the first
        # piece of cached data.
        divide_by_liquid_molecules = miscellaneous.DivideValue(
            "divide_by_liquid_molecules"
        )
        divide_by_liquid_molecules.value = ProtocolPath(
            "value", liquid_protocols.reweight_observable.id
        )
        divide_by_liquid_molecules.divisor = ProtocolPath(
            "total_number_of_molecules",
            liquid_protocols.unpack_stored_data.id.replace(
                liquid_replicator.placeholder_id, "0"
            ),
        )

        # Set up a protocol to extract the gas phase energy from the existing data.
        gas_protocols, gas_replicator = generate_reweighting_protocols(
            ObservableType.PotentialEnergy,
            id_suffix="_gas",
            replicator_id="gas_data_replicator",
        )
        gas_replicator.template_values = ProtocolPath("gas_data", "global")
        gas_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples
        )

        # Turn of PBC for the gas phase.
        gas_protocols.evaluate_reference_potential.enable_pbc = False
        gas_protocols.evaluate_target_potential.enable_pbc = False

        # Combine the values to estimate the final enthalpy of vaporization
        energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization")
        energy_of_vaporization.value_b = ProtocolPath(
            "value", gas_protocols.reweight_observable.id
        )
        energy_of_vaporization.value_a = ProtocolPath(
            "result", divide_by_liquid_molecules.id
        )

        ideal_volume = miscellaneous.MultiplyValue("ideal_volume")
        ideal_volume.value = 1.0 * unit.molar_gas_constant
        ideal_volume.multiplier = ProtocolPath(
            "thermodynamic_state.temperature", "global"
        )

        enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization")
        enthalpy_of_vaporization.values = [
            ProtocolPath("result", energy_of_vaporization.id),
            ProtocolPath("result", ideal_volume.id),
        ]

        # Build the workflow schema.
        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in liquid_protocols if x is not None),
            *(x.schema for x in gas_protocols if x is not None),
            divide_by_liquid_molecules.schema,
            energy_of_vaporization.schema,
            ideal_volume.schema,
            enthalpy_of_vaporization.schema,
        ]
        schema.protocol_replicators = [liquid_replicator, gas_replicator]
        schema.final_value_source = ProtocolPath("result", enthalpy_of_vaporization.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define a custom conditional group which will ensure both the liquid and
        # gas enthalpies are estimated to within the specified uncertainty tolerance.
        converge_uncertainty = groups.ConditionalGroup("conditional_group")
        converge_uncertainty.max_iterations = 100

        # Define the protocols to perform the simulation in the liquid phase.
        average_liquid_energy = analysis.AverageObservable("average_liquid_potential")
        average_liquid_energy.divisor = n_molecules
        (
            liquid_protocols,
            liquid_value_source,
            liquid_output_to_store,
        ) = generate_simulation_protocols(
            average_liquid_energy,
            use_target_uncertainty,
            "_liquid",
            converge_uncertainty,
            n_molecules=n_molecules,
        )
        liquid_output_to_store.property_phase = PropertyPhase.Liquid

        liquid_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.PotentialEnergy.value}]",
            liquid_protocols.production_simulation.id,
        )

        # Define the protocols to perform the simulation in the gas phase.
        average_gas_energy = analysis.AverageObservable("average_gas_potential")
        (
            gas_protocols,
            gas_value_source,
            gas_output_to_store,
        ) = generate_simulation_protocols(
            average_gas_energy,
            use_target_uncertainty,
            "_gas",
            converge_uncertainty,
            n_molecules=1,
        )
        gas_output_to_store.property_phase = PropertyPhase.Gas

        gas_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.PotentialEnergy.value}]",
            gas_protocols.production_simulation.id,
        )

        # Specify that for the gas phase only a single molecule in vacuum should be
        # created.
        gas_protocols.build_coordinates.max_molecules = 1
        gas_protocols.build_coordinates.mass_density = (
            0.01 * unit.gram / unit.milliliter
        )

        # Run the gas phase simulations in the NVT ensemble without PBC
        gas_protocols.energy_minimisation.enable_pbc = False
        gas_protocols.equilibration_simulation.ensemble = Ensemble.NVT
        gas_protocols.equilibration_simulation.enable_pbc = False
        gas_protocols.production_simulation.ensemble = Ensemble.NVT
        gas_protocols.production_simulation.enable_pbc = False
        gas_protocols.production_simulation.steps_per_iteration = 15000000
        gas_protocols.production_simulation.output_frequency = 5000
        gas_protocols.production_simulation.checkpoint_frequency = 100

        # Due to a bizarre issue where the OMM Reference platform is
        # the fastest at computing properties of a single molecule
        # in vacuum, we enforce those inputs which will force the
        # gas calculations to run on the Reference platform.
        gas_protocols.equilibration_simulation.high_precision = True
        gas_protocols.equilibration_simulation.allow_gpu_platforms = False
        gas_protocols.production_simulation.high_precision = True
        gas_protocols.production_simulation.allow_gpu_platforms = False

        # Combine the values to estimate the final energy of vaporization
        energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization")
        energy_of_vaporization.value_b = ProtocolPath("value", average_gas_energy.id)
        energy_of_vaporization.value_a = ProtocolPath("value", average_liquid_energy.id)

        ideal_volume = miscellaneous.MultiplyValue("ideal_volume")
        ideal_volume.value = 1.0 * unit.molar_gas_constant
        ideal_volume.multiplier = ProtocolPath(
            "thermodynamic_state.temperature", "global"
        )

        enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization")
        enthalpy_of_vaporization.values = [
            ProtocolPath("result", energy_of_vaporization.id),
            ProtocolPath("result", ideal_volume.id),
        ]

        # Add the extra protocols and conditions to the custom conditional group.
        converge_uncertainty.add_protocols(
            energy_of_vaporization, ideal_volume, enthalpy_of_vaporization
        )

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan

            condition.left_hand_value = ProtocolPath(
                "result.error",
                converge_uncertainty.id,
                enthalpy_of_vaporization.id,
            )
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")

            gas_protocols.production_simulation.total_number_of_iterations = (
                ProtocolPath("current_iteration", converge_uncertainty.id)
            )
            liquid_protocols.production_simulation.total_number_of_iterations = (
                ProtocolPath("current_iteration", converge_uncertainty.id)
            )

            converge_uncertainty.add_condition(condition)

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            liquid_protocols.build_coordinates.schema,
            liquid_protocols.assign_parameters.schema,
            liquid_protocols.energy_minimisation.schema,
            liquid_protocols.equilibration_simulation.schema,
            liquid_protocols.decorrelate_trajectory.schema,
            liquid_protocols.decorrelate_observables.schema,
            gas_protocols.build_coordinates.schema,
            gas_protocols.assign_parameters.schema,
            gas_protocols.energy_minimisation.schema,
            gas_protocols.equilibration_simulation.schema,
            gas_protocols.decorrelate_trajectory.schema,
            gas_protocols.decorrelate_observables.schema,
            converge_uncertainty.schema,
        ]

        schema.outputs_to_store = {
            "liquid_data": liquid_output_to_store,
            "gas_data": gas_output_to_store,
        }

        schema.final_value_source = ProtocolPath(
            "result", converge_uncertainty.id, enthalpy_of_vaporization.id
        )

        calculation_schema.workflow_schema = schema
        return calculation_schema
Exemple #9
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = (
            ExcessMolarVolume._default_reweighting_storage_query()
        )

        # Set up a replicator that will re-run the component reweighting workflow for each
        # component in the system.
        component_replicator = ProtocolReplicator(replicator_id="component_replicator")
        component_replicator.template_values = ProtocolPath("components", "global")

        gradient_replicator = ProtocolReplicator("gradient")
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Set up the protocols which will reweight data for the full system.
        full_data_replicator_id = "full_data_replicator"

        (
            full_protocols,
            full_volume,
            full_data_replicator,
            full_gradient_group,
            full_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_full",
            gradient_replicator.id,
            full_data_replicator_id,
            n_effective_samples=n_effective_samples,
        )

        # Set up the protocols which will reweight data for each component.
        component_data_replicator_id = (
            f"component_{component_replicator.placeholder_id}_data_replicator"
        )

        (
            component_protocols,
            component_volumes,
            component_data_replicator,
            component_gradient_group,
            component_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_component",
            gradient_replicator.id,
            component_data_replicator_id,
            replicator_id=component_replicator.id,
            weight_by_mole_fraction=True,
            substance_reference=ReplicatorValue(component_replicator.id),
            n_effective_samples=n_effective_samples,
        )

        # Make sure the replicator is only replicating over component data.
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global"
        )

        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_potential"
        )
        calculate_excess_volume.value_b = full_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_{gradient_replicator.placeholder_id}"
        )
        add_component_gradients.values = component_gradient_source

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_{gradient_replicator.placeholder_id}"
        )
        combine_gradients.value_b = full_gradient_source
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Build the final workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *(x.schema for x in full_protocols),
            *(x.schema for x in component_protocols),
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            full_gradient_group.schema,
            component_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [
            full_data_replicator,
            component_replicator,
            component_data_replicator,
            gradient_replicator,
        ]

        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Exemple #10
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the id of the replicator which will clone the gradient protocols
        # for each gradient key to be estimated.
        gradient_replicator_id = "gradient_replicator"

        # Set up a workflow to calculate the molar volume of the full, mixed system.
        (
            full_system_protocols,
            full_system_molar_molecules,
            full_system_volume,
            full_output,
            full_system_gradient_group,
            full_system_gradient_replicator,
            full_system_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_full",
            gradient_replicator_id,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up a general workflow for calculating the molar volume of one of the system components.
        component_replicator_id = "component_replicator"
        component_substance = ReplicatorValue(component_replicator_id)

        # Make sure to weight by the mole fractions of the actual full system as these may be slightly
        # different to the mole fractions of the measure property due to rounding.
        full_substance = ProtocolPath(
            "output_substance", full_system_protocols.build_coordinates.id
        )

        (
            component_protocols,
            component_molar_molecules,
            component_volumes,
            component_output,
            component_gradient_group,
            component_gradient_replicator,
            component_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_component",
            gradient_replicator_id,
            replicator_id=component_replicator_id,
            weight_by_mole_fraction=True,
            component_substance_reference=component_substance,
            full_substance_reference=full_substance,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Finally, set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the mixed system molar volume.
        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_volume"
        )
        calculate_excess_volume.value_b = full_system_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Create the replicator object which defines how the pure component
        # molar volume estimation protocols will be replicated for each component.
        component_replicator = ProtocolReplicator(replicator_id=component_replicator_id)
        component_replicator.template_values = ProtocolPath("components", "global")

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_$({gradient_replicator_id})"
        )
        add_component_gradients.values = component_gradient

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_$({gradient_replicator_id})"
        )
        combine_gradients.value_b = full_system_gradient
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Combine the gradient replicators.
        gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id)
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_molar_molecules.schema,
            full_system_protocols.build_coordinates.schema,
            full_system_protocols.assign_parameters.schema,
            full_system_protocols.energy_minimisation.schema,
            full_system_protocols.equilibration_simulation.schema,
            full_system_protocols.converge_uncertainty.schema,
            full_system_molar_molecules.schema,
            component_protocols.extract_uncorrelated_trajectory.schema,
            component_protocols.extract_uncorrelated_statistics.schema,
            full_system_protocols.extract_uncorrelated_trajectory.schema,
            full_system_protocols.extract_uncorrelated_statistics.schema,
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            component_gradient_group.schema,
            full_system_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [gradient_replicator, component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        schema.outputs_to_store = {
            "full_system": full_output,
            f"component_$({component_replicator_id})": component_output,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
Exemple #11
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the protocol which will extract the average density from
        # the results of a simulation.
        extract_density = analysis.ExtractAverageStatistic("extract_density")
        extract_density.statistics_type = ObservableType.Density

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_density,
            use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up the gradient calculations
        coordinate_source = ProtocolPath(
            "output_coordinate_file", protocols.equilibration_simulation.id
        )
        trajectory_source = ProtocolPath(
            "trajectory_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )
        statistics_source = ProtocolPath(
            "statistics_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )

        reweight_density_template = reweighting.ReweightStatistics("")
        reweight_density_template.statistics_type = ObservableType.Density
        reweight_density_template.statistics_paths = statistics_source
        reweight_density_template.reference_reduced_potentials = statistics_source

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_density_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_source,
            trajectory_source,
            statistics_source,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.schema,
        ]

        schema.protocol_replicators = [gradient_replicator]

        schema.outputs_to_store = {"full_system": output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
Exemple #12
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        data_replicator_id = "data_replicator"

        # The protocol which will be used to calculate the densities from
        # the existing data.
        density_calculation = analysis.ExtractAverageStatistic(
            f"calc_density_$({data_replicator_id})"
        )
        density_calculation.statistics_type = ObservableType.Density

        reweight_density = reweighting.ReweightStatistics("reweight_density")
        reweight_density.statistics_type = ObservableType.Density
        reweight_density.required_effective_samples = n_effective_samples

        protocols, data_replicator = generate_base_reweighting_protocols(
            density_calculation, reweight_density, data_replicator_id
        )

        # Set up the gradient calculations
        coordinate_path = ProtocolPath(
            "output_coordinate_path", protocols.concatenate_trajectories.id
        )
        trajectory_path = ProtocolPath(
            "output_trajectory_path", protocols.concatenate_trajectories.id
        )
        statistics_path = ProtocolPath(
            "statistics_file_path", protocols.reduced_target_potential.id
        )

        reweight_density_template = copy.deepcopy(reweight_density)

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_density_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_path,
            trajectory_path,
            statistics_path,
            replicator_id="grad",
            effective_sample_indices=ProtocolPath(
                "effective_sample_indices", protocols.mbar_protocol.id
            ),
        )

        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in protocols),
            gradient_group.schema,
        ]
        schema.protocol_replicators = [data_replicator, gradient_replicator]
        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath("value", protocols.mbar_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema