Esempio n. 1
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=1000) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_simulation_protocols(
            analysis.AverageObservable("average_density"),
            use_target_uncertainty,
            n_molecules=n_molecules,
        )
        # Specify that the average density should be estimated.
        protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.Density.value}]",
            protocols.production_simulation.id,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.decorrelate_trajectory.schema,
            protocols.decorrelate_observables.schema,
        ]

        schema.outputs_to_store = {"full_system": output_to_store}
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 2
0
    def _set_schema(self, schema):
        """Sets this workflow's properties from a `WorkflowSchema`.

        Parameters
        ----------
        schema: WorkflowSchema
            The schema which outlines this steps in this workflow.
        """
        # Copy the schema.
        schema = WorkflowSchema.parse_json(schema.json())

        if schema.final_value_source != UNDEFINED:

            self._final_value_source = schema.final_value_source
            self._final_value_source.append_uuid(self.uuid)

        self._build_protocols(schema)

        self._outputs_to_store = {}

        if schema.outputs_to_store != UNDEFINED:

            for label in schema.outputs_to_store:

                self._append_uuid_to_output_to_store(
                    schema.outputs_to_store[label])
                self._outputs_to_store[label] = self._build_output_to_store(
                    schema.outputs_to_store[label])
Esempio n. 3
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        protocols, data_replicator = generate_reweighting_protocols(
            ObservableType.Density)
        protocols.reweight_observable.required_effective_samples = n_effective_samples

        schema = WorkflowSchema()
        schema.protocol_schemas = [x.schema for x in protocols]
        schema.protocol_replicators = [data_replicator]

        schema.final_value_source = ProtocolPath(
            "value", protocols.reweight_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 4
0
    def _get_schema(self):
        """Returns the schema that describes this workflow.

        Returns
        -------
        WorkflowSchema
            The schema that describes this workflow.
        """
        schema = WorkflowSchema()

        schema.id = self.uuid
        schema.protocol_schemas = [
            copy.deepcopy(x.schema) for x in self._protocols
        ]

        if self._final_value_source != UNDEFINED:
            schema.final_value_source = self._final_value_source.copy()

        schema.gradients_sources = [
            source.copy() for source in self._gradients_sources
        ]
        schema.outputs_to_store = copy.deepcopy(self._outputs_to_store)

        return schema
Esempio n. 5
0
    def default_paprika_schema(
        cls,
        existing_schema: SimulationSchema = None,
        n_solvent_molecules: int = 2500,
        n_thermalization_steps: int = 50000,
        n_equilibration_steps: int = 200000,
        n_production_steps: int = 2500000,
        dt_thermalization: unit.Quantity = 1.0 * unit.femtosecond,
        dt_equilibration: unit.Quantity = 2.0 * unit.femtosecond,
        dt_production: unit.Quantity = 2.0 * unit.femtosecond,
        debug: bool = False,
    ):
        """Returns the default calculation schema to use when estimating
        a host-guest binding affinity measurement with an APR calculation
        using the ``paprika`` package.

        Notes
        -----
        * This schema requires additional metadata to be able to estimate
          each metadata. This metadata is automatically generated for properties
          loaded from the ``taproom`` package using the ``TaproomDataSet`` object.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.
        n_solvent_molecules
            The number of solvent molecules to add to the box.
        n_thermalization_steps
            The number of thermalization simulations steps to perform.
            Sample generated during this step will be discarded.
        n_equilibration_steps
            The number of equilibration simulations steps to perform.
            Sample generated during this step will be discarded.
        n_production_steps
            The number of production simulations steps to perform.
            Sample generated during this step will be used in the final
            free energy calculation.
        dt_thermalization
            The integration timestep during thermalization
        dt_equilibration
            The integration timestep during equilibration
        dt_production
            The integration timestep during production
        debug
            Whether to return a debug schema. This is nearly identical
            to the default schema, albeit with significantly less
            solvent molecules (10), all simulations run in NVT and much
            shorter simulation runs (500 steps). If True, the other input
            arguments will be ignored.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        # Initialize the protocols which will serve as templates for those
        # used in the actual workflows.
        solvation_template = cls._paprika_default_solvation_protocol(
            n_solvent_molecules=n_solvent_molecules)

        (
            minimization_template,
            *simulation_templates,
        ) = cls._paprika_default_simulation_protocols(
            n_thermalization_steps=n_thermalization_steps,
            n_equilibration_steps=n_equilibration_steps,
            n_production_steps=n_production_steps,
            dt_thermalization=dt_thermalization,
            dt_equilibration=dt_equilibration,
            dt_production=dt_production,
        )

        if debug:

            solvation_template.max_molecules = 10
            solvation_template.mass_density = 0.01 * unit.grams / unit.milliliters

            for simulation_template in simulation_templates:

                simulation_template.ensemble = Ensemble.NVT
                simulation_template.steps_per_iteration = 500
                simulation_template.output_frequency = 50

        # Set up a replicator which will perform the attach-pull calculation for
        # each of the guest orientations
        orientation_replicator = ProtocolReplicator("orientation_replicator")
        orientation_replicator.template_values = ProtocolPath(
            "guest_orientations", "global")

        restraint_schemas = {
            "static":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"static_restraints",
                "global",
            ),
            "conformational":
            ProtocolPath(
                f"guest_orientations[{orientation_replicator.placeholder_id}]."
                f"conformational_restraints",
                "global",
            ),
            "guest":
            ProtocolPath("guest_restraints", "global"),
            "wall":
            ProtocolPath("wall_restraints", "global"),
            "symmetry":
            ProtocolPath("symmetry_restraints", "global"),
        }

        # Build the protocols to compute the attach and pull free energies.
        (
            attach_pull_protocols,
            attach_pull_replicators,
            attach_free_energy,
            pull_free_energy,
            reference_work,
        ) = cls._paprika_build_attach_pull_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Build the protocols to compute the release free energies.
        (
            release_protocols,
            release_replicator,
            release_free_energy,
        ) = cls._paprika_build_release_protocols(
            orientation_replicator,
            restraint_schemas,
            solvation_template,
            minimization_template,
            *simulation_templates,
        )

        # Compute the symmetry correction.
        symmetry_correction = ComputeSymmetryCorrection("symmetry_correction")
        symmetry_correction.n_microstates = ProtocolPath(
            "n_guest_microstates", "global")
        symmetry_correction.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        # Sum together the free energies of the individual orientations
        orientation_free_energy = miscellaneous.AddValues(
            f"orientation_free_energy_{orientation_replicator.placeholder_id}")
        orientation_free_energy.values = [
            attach_free_energy,
            pull_free_energy,
            reference_work,
            release_free_energy,
            ProtocolPath("result", symmetry_correction.id),
        ]

        # Finally, combine all of the values together
        total_free_energy = analysis.AverageFreeEnergies("total_free_energy")
        total_free_energy.values = ProtocolPath("result",
                                                orientation_free_energy.id)
        total_free_energy.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        calculation_schema.workflow_schema = WorkflowSchema()

        calculation_schema.workflow_schema.protocol_schemas = [
            *(protocol.schema for protocol in attach_pull_protocols),
            *(protocol.schema for protocol in release_protocols),
            symmetry_correction.schema,
            orientation_free_energy.schema,
            total_free_energy.schema,
        ]
        calculation_schema.workflow_schema.protocol_replicators = [
            orientation_replicator,
            *attach_pull_replicators,
            release_replicator,
        ]

        # Define where the final value comes from.
        calculation_schema.workflow_schema.final_value_source = ProtocolPath(
            "result", total_free_energy.id)

        return calculation_schema
Esempio n. 6
0
    def default_yank_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:
            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "parameterized_system", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "parameterized_system", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("free_energy_difference",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 7
0
    def default_simulation_schema(
        cls,
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_molecules=1000,
    ) -> SimulationSchema:
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Define the protocols to use for the fully mixed system.
        (
            mixture_protocols,
            mixture_value,
            mixture_stored_data,
        ) = generate_simulation_protocols(
            analysis.AverageObservable("extract_observable_mixture"),
            use_target_uncertainty,
            id_suffix="_mixture",
            n_molecules=n_molecules,
        )
        # Specify the average observable which should be estimated.
        mixture_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            mixture_protocols.production_simulation.id,
        )
        (
            mixture_protocols.analysis_protocol.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         mixture_protocols.build_coordinates.id),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the mixture substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols, _, component_stored_data = generate_simulation_protocols(
            analysis.AverageObservable(
                f"extract_observable_component_{component_replicator.placeholder_id}"
            ),
            use_target_uncertainty,
            id_suffix=f"_component_{component_replicator.placeholder_id}",
            n_molecules=n_molecules,
        )
        # Make sure the protocols point to the correct substance.
        component_protocols.build_coordinates.substance = component_substance
        # Specify the average observable which should be estimated.
        component_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{cls._observable_type().value}]",
            component_protocols.production_simulation.id,
        )
        (
            component_protocols.analysis_protocol.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath("output_number_of_molecules",
                         component_protocols.build_coordinates.id),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "value", component_protocols.analysis_protocol.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        component_protocols.converge_uncertainty.add_protocols(
            weight_by_mole_fraction)

        # Make sure the convergence criteria is set to use the per component
        # uncertainty target.
        if use_target_uncertainty:
            component_protocols.converge_uncertainty.conditions[
                0].right_hand_value = ProtocolPath("per_component_uncertainty",
                                                   "global")

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the mixture system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            component_protocols.converge_uncertainty.id,
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = mixture_value
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_protocols.decorrelate_trajectory.schema,
            component_protocols.decorrelate_observables.schema,
            mixture_protocols.build_coordinates.schema,
            mixture_protocols.assign_parameters.schema,
            mixture_protocols.energy_minimisation.schema,
            mixture_protocols.equilibration_simulation.schema,
            mixture_protocols.converge_uncertainty.schema,
            mixture_protocols.decorrelate_trajectory.schema,
            mixture_protocols.decorrelate_observables.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [component_replicator]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        schema.outputs_to_store = {
            "full_system":
            mixture_stored_data,
            f"component_{component_replicator.placeholder_id}":
            component_stored_data,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 8
0
    def _default_reweighting_schema(
        cls,
        observable_type: ObservableType,
        absolute_tolerance: unit.Quantity = UNDEFINED,
        relative_tolerance: float = UNDEFINED,
        n_effective_samples: int = 50,
    ) -> ReweightingSchema:
        """Returns the default calculation schema to use when estimating this class of
        property by re-weighting cached simulation data.

        This internal implementation allows re-weighting a different observable than
        may be specified by the `_observable_type` class property.

        Parameters
        ----------
        absolute_tolerance
            The absolute tolerance to estimate the property to within.
        relative_tolerance
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
            The default re-weighting calculation schema.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = cls._default_reweighting_storage_query(
        )

        # Define the protocols which will re-weight the observable computed for the
        # fully mixed system.
        mixture_protocols, mixture_data_replicator = generate_reweighting_protocols(
            observable_type,
            "mixture_data_replicator",
            "_mixture",
        )
        mixture_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)

        divide_by_mixture_molecules = miscellaneous.DivideValue(
            "divide_by_mixture_molecules")
        divide_by_mixture_molecules.value = ProtocolPath(
            "value", mixture_protocols.reweight_observable.id)
        (
            divide_by_mixture_molecules.divisor,
            mixture_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                mixture_protocols.unpack_stored_data.id.replace(
                    mixture_data_replicator.placeholder_id, "0"),
            ),
            "_mixture",
        )

        # Define the protocols to use for each component, creating a replicator that
        # will copy these for each component in the full substance.
        component_replicator = ProtocolReplicator("component_replicator")
        component_replicator.template_values = ProtocolPath(
            "components", "global")

        component_protocols, component_data_replicator = generate_reweighting_protocols(
            observable_type,
            f"component_{component_replicator.placeholder_id}_data_replicator",
            f"_component_{component_replicator.placeholder_id}",
        )
        component_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples)
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global")

        divide_by_component_molecules = miscellaneous.DivideValue(
            f"divide_by_component_{component_replicator.placeholder_id}_molecules"
        )
        divide_by_component_molecules.value = ProtocolPath(
            "value", component_protocols.reweight_observable.id)
        (
            divide_by_component_molecules.divisor,
            component_n_molar_molecules,
        ) = cls._n_molecules_divisor(
            ProtocolPath(
                "total_number_of_molecules",
                component_protocols.unpack_stored_data.id.replace(
                    component_data_replicator.placeholder_id, "0"),
            ),
            f"_component_{component_replicator.placeholder_id}",
        )

        # Make sure the protocols point to the correct substance.
        component_substance = ReplicatorValue(component_replicator.id)

        component_protocols.build_reference_system.substance = component_substance
        component_protocols.build_target_system.substance = component_substance

        # Weight the component value by the mole fraction.
        weight_by_mole_fraction = miscellaneous.WeightByMoleFraction(
            f"weight_by_mole_fraction_{component_replicator.placeholder_id}")
        weight_by_mole_fraction.value = ProtocolPath(
            "result", divide_by_component_molecules.id)
        weight_by_mole_fraction.full_substance = ProtocolPath(
            "substance", "global")
        weight_by_mole_fraction.component = component_substance

        # Finally, set up the protocols which will be responsible for adding together
        # the component observables, and subtracting these from the full system value.
        add_component_observables = miscellaneous.AddValues(
            "add_component_observables")
        add_component_observables.values = ProtocolPath(
            "weighted_value",
            weight_by_mole_fraction.id,
        )

        calculate_excess_observable = miscellaneous.SubtractValues(
            "calculate_excess_observable")
        calculate_excess_observable.value_b = ProtocolPath(
            "result", divide_by_mixture_molecules.id)
        calculate_excess_observable.value_a = ProtocolPath(
            "result", add_component_observables.id)

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *[x.schema for x in mixture_protocols if x is not None],
            divide_by_mixture_molecules.schema,
            *[x.schema for x in component_protocols if x is not None],
            divide_by_component_molecules.schema,
            weight_by_mole_fraction.schema,
            add_component_observables.schema,
            calculate_excess_observable.schema,
        ]

        if component_n_molar_molecules is not None:
            schema.protocol_schemas.append(component_n_molar_molecules.schema)
        if mixture_n_molar_molecules is not None:
            schema.protocol_schemas.append(mixture_n_molar_molecules.schema)

        schema.protocol_replicators = [
            mixture_data_replicator,
            component_replicator,
            component_data_replicator,
        ]

        schema.final_value_source = ProtocolPath(
            "result", calculate_excess_observable.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 9
0
    def default_reweighting_schema(
        cls,
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = cls._default_reweighting_storage_query()

        # Set up a protocol to extract the liquid phase energy from the existing data.
        liquid_protocols, liquid_replicator = generate_reweighting_protocols(
            ObservableType.PotentialEnergy,
            id_suffix="_liquid",
            replicator_id="liquid_data_replicator",
        )
        liquid_replicator.template_values = ProtocolPath("liquid_data", "global")
        liquid_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples
        )

        # Dive the potential by the number of liquid phase molecules from the first
        # piece of cached data.
        divide_by_liquid_molecules = miscellaneous.DivideValue(
            "divide_by_liquid_molecules"
        )
        divide_by_liquid_molecules.value = ProtocolPath(
            "value", liquid_protocols.reweight_observable.id
        )
        divide_by_liquid_molecules.divisor = ProtocolPath(
            "total_number_of_molecules",
            liquid_protocols.unpack_stored_data.id.replace(
                liquid_replicator.placeholder_id, "0"
            ),
        )

        # Set up a protocol to extract the gas phase energy from the existing data.
        gas_protocols, gas_replicator = generate_reweighting_protocols(
            ObservableType.PotentialEnergy,
            id_suffix="_gas",
            replicator_id="gas_data_replicator",
        )
        gas_replicator.template_values = ProtocolPath("gas_data", "global")
        gas_protocols.reweight_observable.required_effective_samples = (
            n_effective_samples
        )

        # Turn of PBC for the gas phase.
        gas_protocols.evaluate_reference_potential.enable_pbc = False
        gas_protocols.evaluate_target_potential.enable_pbc = False

        # Combine the values to estimate the final enthalpy of vaporization
        energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization")
        energy_of_vaporization.value_b = ProtocolPath(
            "value", gas_protocols.reweight_observable.id
        )
        energy_of_vaporization.value_a = ProtocolPath(
            "result", divide_by_liquid_molecules.id
        )

        ideal_volume = miscellaneous.MultiplyValue("ideal_volume")
        ideal_volume.value = 1.0 * unit.molar_gas_constant
        ideal_volume.multiplier = ProtocolPath(
            "thermodynamic_state.temperature", "global"
        )

        enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization")
        enthalpy_of_vaporization.values = [
            ProtocolPath("result", energy_of_vaporization.id),
            ProtocolPath("result", ideal_volume.id),
        ]

        # Build the workflow schema.
        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in liquid_protocols if x is not None),
            *(x.schema for x in gas_protocols if x is not None),
            divide_by_liquid_molecules.schema,
            energy_of_vaporization.schema,
            ideal_volume.schema,
            enthalpy_of_vaporization.schema,
        ]
        schema.protocol_replicators = [liquid_replicator, gas_replicator]
        schema.final_value_source = ProtocolPath("result", enthalpy_of_vaporization.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 10
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define a custom conditional group which will ensure both the liquid and
        # gas enthalpies are estimated to within the specified uncertainty tolerance.
        converge_uncertainty = groups.ConditionalGroup("conditional_group")
        converge_uncertainty.max_iterations = 100

        # Define the protocols to perform the simulation in the liquid phase.
        average_liquid_energy = analysis.AverageObservable("average_liquid_potential")
        average_liquid_energy.divisor = n_molecules
        (
            liquid_protocols,
            liquid_value_source,
            liquid_output_to_store,
        ) = generate_simulation_protocols(
            average_liquid_energy,
            use_target_uncertainty,
            "_liquid",
            converge_uncertainty,
            n_molecules=n_molecules,
        )
        liquid_output_to_store.property_phase = PropertyPhase.Liquid

        liquid_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.PotentialEnergy.value}]",
            liquid_protocols.production_simulation.id,
        )

        # Define the protocols to perform the simulation in the gas phase.
        average_gas_energy = analysis.AverageObservable("average_gas_potential")
        (
            gas_protocols,
            gas_value_source,
            gas_output_to_store,
        ) = generate_simulation_protocols(
            average_gas_energy,
            use_target_uncertainty,
            "_gas",
            converge_uncertainty,
            n_molecules=1,
        )
        gas_output_to_store.property_phase = PropertyPhase.Gas

        gas_protocols.analysis_protocol.observable = ProtocolPath(
            f"observables[{ObservableType.PotentialEnergy.value}]",
            gas_protocols.production_simulation.id,
        )

        # Specify that for the gas phase only a single molecule in vacuum should be
        # created.
        gas_protocols.build_coordinates.max_molecules = 1
        gas_protocols.build_coordinates.mass_density = (
            0.01 * unit.gram / unit.milliliter
        )

        # Run the gas phase simulations in the NVT ensemble without PBC
        gas_protocols.energy_minimisation.enable_pbc = False
        gas_protocols.equilibration_simulation.ensemble = Ensemble.NVT
        gas_protocols.equilibration_simulation.enable_pbc = False
        gas_protocols.production_simulation.ensemble = Ensemble.NVT
        gas_protocols.production_simulation.enable_pbc = False
        gas_protocols.production_simulation.steps_per_iteration = 15000000
        gas_protocols.production_simulation.output_frequency = 5000
        gas_protocols.production_simulation.checkpoint_frequency = 100

        # Due to a bizarre issue where the OMM Reference platform is
        # the fastest at computing properties of a single molecule
        # in vacuum, we enforce those inputs which will force the
        # gas calculations to run on the Reference platform.
        gas_protocols.equilibration_simulation.high_precision = True
        gas_protocols.equilibration_simulation.allow_gpu_platforms = False
        gas_protocols.production_simulation.high_precision = True
        gas_protocols.production_simulation.allow_gpu_platforms = False

        # Combine the values to estimate the final energy of vaporization
        energy_of_vaporization = miscellaneous.SubtractValues("energy_of_vaporization")
        energy_of_vaporization.value_b = ProtocolPath("value", average_gas_energy.id)
        energy_of_vaporization.value_a = ProtocolPath("value", average_liquid_energy.id)

        ideal_volume = miscellaneous.MultiplyValue("ideal_volume")
        ideal_volume.value = 1.0 * unit.molar_gas_constant
        ideal_volume.multiplier = ProtocolPath(
            "thermodynamic_state.temperature", "global"
        )

        enthalpy_of_vaporization = miscellaneous.AddValues("enthalpy_of_vaporization")
        enthalpy_of_vaporization.values = [
            ProtocolPath("result", energy_of_vaporization.id),
            ProtocolPath("result", ideal_volume.id),
        ]

        # Add the extra protocols and conditions to the custom conditional group.
        converge_uncertainty.add_protocols(
            energy_of_vaporization, ideal_volume, enthalpy_of_vaporization
        )

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan

            condition.left_hand_value = ProtocolPath(
                "result.error",
                converge_uncertainty.id,
                enthalpy_of_vaporization.id,
            )
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")

            gas_protocols.production_simulation.total_number_of_iterations = (
                ProtocolPath("current_iteration", converge_uncertainty.id)
            )
            liquid_protocols.production_simulation.total_number_of_iterations = (
                ProtocolPath("current_iteration", converge_uncertainty.id)
            )

            converge_uncertainty.add_condition(condition)

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            liquid_protocols.build_coordinates.schema,
            liquid_protocols.assign_parameters.schema,
            liquid_protocols.energy_minimisation.schema,
            liquid_protocols.equilibration_simulation.schema,
            liquid_protocols.decorrelate_trajectory.schema,
            liquid_protocols.decorrelate_observables.schema,
            gas_protocols.build_coordinates.schema,
            gas_protocols.assign_parameters.schema,
            gas_protocols.energy_minimisation.schema,
            gas_protocols.equilibration_simulation.schema,
            gas_protocols.decorrelate_trajectory.schema,
            gas_protocols.decorrelate_observables.schema,
            converge_uncertainty.schema,
        ]

        schema.outputs_to_store = {
            "liquid_data": liquid_output_to_store,
            "gas_data": gas_output_to_store,
        }

        schema.final_value_source = ProtocolPath(
            "result", converge_uncertainty.id, enthalpy_of_vaporization.id
        )

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 11
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = (
            ExcessMolarVolume._default_reweighting_storage_query()
        )

        # Set up a replicator that will re-run the component reweighting workflow for each
        # component in the system.
        component_replicator = ProtocolReplicator(replicator_id="component_replicator")
        component_replicator.template_values = ProtocolPath("components", "global")

        gradient_replicator = ProtocolReplicator("gradient")
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Set up the protocols which will reweight data for the full system.
        full_data_replicator_id = "full_data_replicator"

        (
            full_protocols,
            full_volume,
            full_data_replicator,
            full_gradient_group,
            full_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_full",
            gradient_replicator.id,
            full_data_replicator_id,
            n_effective_samples=n_effective_samples,
        )

        # Set up the protocols which will reweight data for each component.
        component_data_replicator_id = (
            f"component_{component_replicator.placeholder_id}_data_replicator"
        )

        (
            component_protocols,
            component_volumes,
            component_data_replicator,
            component_gradient_group,
            component_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_component",
            gradient_replicator.id,
            component_data_replicator_id,
            replicator_id=component_replicator.id,
            weight_by_mole_fraction=True,
            substance_reference=ReplicatorValue(component_replicator.id),
            n_effective_samples=n_effective_samples,
        )

        # Make sure the replicator is only replicating over component data.
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global"
        )

        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_potential"
        )
        calculate_excess_volume.value_b = full_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_{gradient_replicator.placeholder_id}"
        )
        add_component_gradients.values = component_gradient_source

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_{gradient_replicator.placeholder_id}"
        )
        combine_gradients.value_b = full_gradient_source
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Build the final workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *(x.schema for x in full_protocols),
            *(x.schema for x in component_protocols),
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            full_gradient_group.schema,
            component_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [
            full_data_replicator,
            component_replicator,
            component_data_replicator,
            gradient_replicator,
        ]

        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 12
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the id of the replicator which will clone the gradient protocols
        # for each gradient key to be estimated.
        gradient_replicator_id = "gradient_replicator"

        # Set up a workflow to calculate the molar volume of the full, mixed system.
        (
            full_system_protocols,
            full_system_molar_molecules,
            full_system_volume,
            full_output,
            full_system_gradient_group,
            full_system_gradient_replicator,
            full_system_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_full",
            gradient_replicator_id,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up a general workflow for calculating the molar volume of one of the system components.
        component_replicator_id = "component_replicator"
        component_substance = ReplicatorValue(component_replicator_id)

        # Make sure to weight by the mole fractions of the actual full system as these may be slightly
        # different to the mole fractions of the measure property due to rounding.
        full_substance = ProtocolPath(
            "output_substance", full_system_protocols.build_coordinates.id
        )

        (
            component_protocols,
            component_molar_molecules,
            component_volumes,
            component_output,
            component_gradient_group,
            component_gradient_replicator,
            component_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_component",
            gradient_replicator_id,
            replicator_id=component_replicator_id,
            weight_by_mole_fraction=True,
            component_substance_reference=component_substance,
            full_substance_reference=full_substance,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Finally, set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the mixed system molar volume.
        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_volume"
        )
        calculate_excess_volume.value_b = full_system_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Create the replicator object which defines how the pure component
        # molar volume estimation protocols will be replicated for each component.
        component_replicator = ProtocolReplicator(replicator_id=component_replicator_id)
        component_replicator.template_values = ProtocolPath("components", "global")

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_$({gradient_replicator_id})"
        )
        add_component_gradients.values = component_gradient

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_$({gradient_replicator_id})"
        )
        combine_gradients.value_b = full_system_gradient
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Combine the gradient replicators.
        gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id)
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_molar_molecules.schema,
            full_system_protocols.build_coordinates.schema,
            full_system_protocols.assign_parameters.schema,
            full_system_protocols.energy_minimisation.schema,
            full_system_protocols.equilibration_simulation.schema,
            full_system_protocols.converge_uncertainty.schema,
            full_system_molar_molecules.schema,
            component_protocols.extract_uncorrelated_trajectory.schema,
            component_protocols.extract_uncorrelated_statistics.schema,
            full_system_protocols.extract_uncorrelated_trajectory.schema,
            full_system_protocols.extract_uncorrelated_statistics.schema,
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            component_gradient_group.schema,
            full_system_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [gradient_replicator, component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        schema.outputs_to_store = {
            "full_system": full_output,
            f"component_$({component_replicator_id})": component_output,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 13
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the protocol which will extract the average density from
        # the results of a simulation.
        extract_density = analysis.ExtractAverageStatistic("extract_density")
        extract_density.statistics_type = ObservableType.Density

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_density,
            use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up the gradient calculations
        coordinate_source = ProtocolPath(
            "output_coordinate_file", protocols.equilibration_simulation.id
        )
        trajectory_source = ProtocolPath(
            "trajectory_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )
        statistics_source = ProtocolPath(
            "statistics_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )

        reweight_density_template = reweighting.ReweightStatistics("")
        reweight_density_template.statistics_type = ObservableType.Density
        reweight_density_template.statistics_paths = statistics_source
        reweight_density_template.reference_reduced_potentials = statistics_source

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_density_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_source,
            trajectory_source,
            statistics_source,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.schema,
        ]

        schema.protocol_replicators = [gradient_replicator]

        schema.outputs_to_store = {"full_system": output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
Esempio n. 14
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        data_replicator_id = "data_replicator"

        # The protocol which will be used to calculate the densities from
        # the existing data.
        density_calculation = analysis.ExtractAverageStatistic(
            f"calc_density_$({data_replicator_id})"
        )
        density_calculation.statistics_type = ObservableType.Density

        reweight_density = reweighting.ReweightStatistics("reweight_density")
        reweight_density.statistics_type = ObservableType.Density
        reweight_density.required_effective_samples = n_effective_samples

        protocols, data_replicator = generate_base_reweighting_protocols(
            density_calculation, reweight_density, data_replicator_id
        )

        # Set up the gradient calculations
        coordinate_path = ProtocolPath(
            "output_coordinate_path", protocols.concatenate_trajectories.id
        )
        trajectory_path = ProtocolPath(
            "output_trajectory_path", protocols.concatenate_trajectories.id
        )
        statistics_path = ProtocolPath(
            "statistics_file_path", protocols.reduced_target_potential.id
        )

        reweight_density_template = copy.deepcopy(reweight_density)

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_density_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_path,
            trajectory_path,
            statistics_path,
            replicator_id="grad",
            effective_sample_indices=ProtocolPath(
                "effective_sample_indices", protocols.mbar_protocol.id
            ),
        )

        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in protocols),
            gradient_group.schema,
        ]
        schema.protocol_replicators = [data_replicator, gradient_replicator]
        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath("value", protocols.mbar_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema