Example #1
0
def test_index_replicated_protocol():

    replicator = ProtocolReplicator("replicator")
    replicator.template_values = ["a", "b", "c", "d"]

    replicated_protocol = DummyInputOutputProtocol(
        f"protocol_{replicator.placeholder_id}")
    replicated_protocol.input_value = ReplicatorValue(replicator.id)

    schema = WorkflowSchema()
    schema.protocol_replicators = [replicator]
    schema.protocol_schemas = [replicated_protocol.schema]

    for index in range(len(replicator.template_values)):

        indexing_protocol = DummyInputOutputProtocol(
            f"indexing_protocol_{index}")
        indexing_protocol.input_value = ProtocolPath("output_value",
                                                     f"protocol_{index}")
        schema.protocol_schemas.append(indexing_protocol.schema)

    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema
Example #2
0
def test_nested_input():

    dict_protocol = DummyInputOutputProtocol("dict_protocol")
    dict_protocol.input_value = {"a": ThermodynamicState(1.0 * unit.kelvin)}

    quantity_protocol = DummyInputOutputProtocol("quantity_protocol")
    quantity_protocol.input_value = ProtocolPath("output_value[a].temperature",
                                                 dict_protocol.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [dict_protocol.schema, quantity_protocol.schema]
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as temporary_directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(temporary_directory,
                                                     calculation_backend)

            assert len(results_futures) == 1
            result = results_futures[0].result()

    assert isinstance(result, WorkflowResult)
Example #3
0
def test_simple_workflow_graph(calculation_backend, compute_resources,
                               exception):

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema, protocol_b.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        if calculation_backend is not None:

            with DaskLocalCluster() as calculation_backend:

                if exception:

                    with pytest.raises(AssertionError):

                        workflow_graph.execute(directory, calculation_backend,
                                               compute_resources)

                    return

                else:

                    results_futures = workflow_graph.execute(
                        directory, calculation_backend, compute_resources)

                assert len(results_futures) == 1
                result = results_futures[0].result()

        else:

            result = workflow_graph.execute(directory, calculation_backend,
                                            compute_resources)[0]

            if exception:

                with pytest.raises(AssertionError):

                    workflow_graph.execute(directory, calculation_backend,
                                           compute_resources)

                return

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
Example #4
0
def test_from_schema():

    protocol_a = DummyProtocol("protocol_a")
    protocol_a.input_value = 1 * unit.kelvin

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema]

    workflow = Workflow.from_schema(schema, {}, unique_id="")

    assert workflow is not None

    rebuilt_schema = workflow.schema
    rebuilt_schema.outputs_to_store = UNDEFINED

    assert rebuilt_schema.json(format=True) == schema.json(format=True)
Example #5
0
def test_nested_replicators():

    dummy_schema = WorkflowSchema()

    dummy_protocol = DummyReplicableProtocol("dummy_$(rep_a)_$(rep_b)")

    dummy_protocol.replicated_value_a = ReplicatorValue("rep_a")
    dummy_protocol.replicated_value_b = ReplicatorValue("rep_b")

    dummy_schema.protocol_schemas = [dummy_protocol.schema]

    replicator_a = ProtocolReplicator(replicator_id="rep_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(replicator_id="rep_b")
    replicator_b.template_values = [1, 2]

    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_1"].replicated_value_a == "a"

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_1"].replicated_value_a == "b"

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1
    assert dummy_workflow.protocols["dummy_0_1"].replicated_value_b == 2

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 1
    assert dummy_workflow.protocols["dummy_1_1"].replicated_value_b == 2

    print(dummy_workflow.schema)
Example #6
0
def test_workflow_with_groups():

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    conditional_group = ConditionalGroup("conditional_group")
    conditional_group.add_protocols(protocol_a, protocol_b)

    condition = ConditionalGroup.Condition()
    condition.right_hand_value = 2 * unit.kelvin
    condition.type = ConditionalGroup.Condition.Type.LessThan
    condition.left_hand_value = ProtocolPath("output_value.value",
                                             conditional_group.id,
                                             protocol_b.id)
    conditional_group.add_condition(condition)

    schema = WorkflowSchema()
    schema.protocol_schemas = [conditional_group.schema]
    schema.final_value_source = ProtocolPath("output_value",
                                             conditional_group.id,
                                             protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(directory,
                                                     calculation_backend)
            assert len(results_futures) == 1

            result = results_futures[0].result()

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
Example #7
0
def test_replicated_ids():

    replicator = ProtocolReplicator("replicator-a")

    protocol_a = DummyProtocol("protocol-a")
    protocol_a.input_value = 1

    group_a = ProtocolGroup(f"group-a-{replicator.placeholder_id}")
    group_a.add_protocols(protocol_a)

    schema = WorkflowSchema()
    schema.protocol_schemas = [group_a.schema]
    schema.protocol_replicators = [replicator]

    with pytest.raises(ValueError) as error_info:
        schema.validate()

    assert (
        f"The children of replicated protocol {group_a.id} must also contain the "
        "replicators placeholder" in str(error_info.value))
Example #8
0
def test_unique_ids():

    protocol_a = DummyProtocol("protocol-a")
    protocol_a.input_value = 1

    group_a = ProtocolGroup("group-a")
    group_a.add_protocols(protocol_a)

    group_b = ProtocolGroup("group-b")
    group_b.add_protocols(protocol_a)

    schema = WorkflowSchema()
    schema.protocol_schemas = [group_a.schema, group_b.schema]

    with pytest.raises(ValueError) as error_info:
        schema.validate()

    assert "Several protocols in the schema have the same id" in str(
        error_info.value)
    assert "protocol-a" in str(error_info.value)
Example #9
0
def test_advanced_nested_replicators():

    dummy_schema = WorkflowSchema()

    replicator_a = ProtocolReplicator(replicator_id="replicator_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(
        replicator_id=f"replicator_b_{replicator_a.placeholder_id}")
    replicator_b.template_values = ProtocolPath(
        f"dummy_list[{replicator_a.placeholder_id}]", "global")

    dummy_protocol = DummyReplicableProtocol(f"dummy_"
                                             f"{replicator_a.placeholder_id}_"
                                             f"{replicator_b.placeholder_id}")

    dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id)
    dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id)

    dummy_schema.protocol_schemas = [dummy_protocol.schema]
    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)
    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])
    dummy_metadata["dummy_list"] = [[1], [2]]

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 2

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 2
Example #10
0
def test_group_replicators():

    dummy_schema = WorkflowSchema()

    replicator_id = "replicator"

    dummy_replicated_protocol = DummyInputOutputProtocol(
        f"dummy_$({replicator_id})")
    dummy_replicated_protocol.input_value = ReplicatorValue(replicator_id)

    dummy_group = ProtocolGroup("dummy_group")
    dummy_group.add_protocols(dummy_replicated_protocol)

    dummy_protocol_single_value = DummyInputOutputProtocol(
        f"dummy_single_$({replicator_id})")
    dummy_protocol_single_value.input_value = ProtocolPath(
        "output_value", dummy_group.id, dummy_replicated_protocol.id)

    dummy_protocol_list_value = AddValues("dummy_list")
    dummy_protocol_list_value.values = ProtocolPath(
        "output_value", dummy_group.id, dummy_replicated_protocol.id)

    dummy_schema.protocol_schemas = [
        dummy_group.schema,
        dummy_protocol_single_value.schema,
        dummy_protocol_list_value.schema,
    ]

    replicator = ProtocolReplicator(replicator_id)

    replicator.template_values = [
        (1.0 * unit.kelvin).plus_minus(1.0 * unit.kelvin),
        (2.0 * unit.kelvin).plus_minus(2.0 * unit.kelvin),
    ]

    dummy_schema.protocol_replicators = [replicator]
    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)

    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 4

    assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_0"].
            input_value.value == replicator.template_values[0].value)
    assert (dummy_workflow.protocols[dummy_group.id].protocols["dummy_1"].
            input_value.value == replicator.template_values[1].value)

    assert dummy_workflow.protocols[
        "dummy_single_0"].input_value == ProtocolPath("output_value",
                                                      dummy_group.id,
                                                      "dummy_0")
    assert dummy_workflow.protocols[
        "dummy_single_1"].input_value == ProtocolPath("output_value",
                                                      dummy_group.id,
                                                      "dummy_1")

    assert len(dummy_workflow.protocols["dummy_list"].values) == 2

    assert dummy_workflow.protocols["dummy_list"].values[0] == ProtocolPath(
        "output_value", dummy_group.id, "dummy_0")
    assert dummy_workflow.protocols["dummy_list"].values[1] == ProtocolPath(
        "output_value", dummy_group.id, "dummy_1")
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        data_replicator_id = "data_replicator"

        # Set up a protocol to extract the dielectric constant from the stored data.
        extract_dielectric = ExtractAverageDielectric(
            f"calc_dielectric_$({data_replicator_id})")

        # For the dielectric constant, we employ a slightly more advanced reweighting
        # protocol set up for calculating fluctuation properties.
        reweight_dielectric = ReweightDielectricConstant("reweight_dielectric")
        reweight_dielectric.reference_dipole_moments = ProtocolPath(
            "uncorrelated_values", extract_dielectric.id)
        reweight_dielectric.reference_volumes = ProtocolPath(
            "uncorrelated_volumes", extract_dielectric.id)
        reweight_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        reweight_dielectric.bootstrap_uncertainties = True
        reweight_dielectric.bootstrap_iterations = 200
        reweight_dielectric.required_effective_samples = n_effective_samples

        protocols, data_replicator = generate_base_reweighting_protocols(
            extract_dielectric, reweight_dielectric, data_replicator_id)

        # Make sure input is taken from the correct protocol outputs.
        extract_dielectric.system_path = ProtocolPath(
            "system_path", protocols.build_reference_system.id)
        extract_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", protocols.unpack_stored_data.id)

        # Set up the gradient calculations
        coordinate_path = ProtocolPath("output_coordinate_path",
                                       protocols.concatenate_trajectories.id)
        trajectory_path = ProtocolPath("output_trajectory_path",
                                       protocols.concatenate_trajectories.id)
        statistics_path = ProtocolPath("statistics_file_path",
                                       protocols.reduced_target_potential.id)

        reweight_dielectric_template = copy.deepcopy(reweight_dielectric)

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_dielectric_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_path,
            trajectory_path,
            statistics_path,
            replicator_id="grad",
            effective_sample_indices=ProtocolPath("effective_sample_indices",
                                                  reweight_dielectric.id),
        )

        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in protocols),
            gradient_group.schema,
        ]
        schema.protocol_replicators = [data_replicator, gradient_replicator]
        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath("value",
                                                 protocols.mbar_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
def test_workflow_layer():
    """Test the `WorkflowLayer` calculation layer. As the `SimulationLayer`
    is the simplest implementation of the abstract layer, we settle for
    testing this."""

    properties_to_estimate = [
        create_dummy_property(Density),
        create_dummy_property(Density),
    ]

    # Create a very simple workflow which just returns some placeholder
    # value.
    estimated_value = Observable(
        (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin))
    protocol_a = DummyProtocol("protocol_a")
    protocol_a.input_value = estimated_value

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_a.id)

    layer_schema = SimulationSchema()
    layer_schema.workflow_schema = schema

    options = RequestOptions()
    options.add_schema("SimulationLayer", "Density", layer_schema)

    batch = server.Batch()
    batch.queued_properties = properties_to_estimate
    batch.options = options

    with tempfile.TemporaryDirectory() as directory:

        with temporarily_change_directory(directory):

            # Create a directory for the layer.
            layer_directory = "simulation_layer"
            os.makedirs(layer_directory)

            # Set-up a simple storage backend and add a force field to it.
            force_field = SmirnoffForceFieldSource.from_path(
                "smirnoff99Frosst-1.1.0.offxml")

            storage_backend = LocalFileStorage()
            batch.force_field_id = storage_backend.store_force_field(
                force_field)

            # Create a simple calculation backend to test with.
            with DaskLocalCluster() as calculation_backend:

                def dummy_callback(returned_request):

                    assert len(returned_request.estimated_properties) == 2
                    assert len(returned_request.exceptions) == 0

                simulation_layer = SimulationLayer()

                simulation_layer.schedule_calculation(
                    calculation_backend,
                    storage_backend,
                    layer_directory,
                    batch,
                    dummy_callback,
                    True,
                )
Example #13
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the protocols which will run the simulation itself.
        protocols, value_source, output_to_store = generate_simulation_protocols(
            AverageDielectricConstant("average_dielectric"),
            use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Add a protocol to compute the dipole moments and pass these to
        # the analysis protocol.
        compute_dipoles = ComputeDipoleMoments("compute_dipoles")
        compute_dipoles.parameterized_system = ProtocolPath(
            "parameterized_system", protocols.assign_parameters.id
        )
        compute_dipoles.trajectory_path = ProtocolPath(
            "trajectory_file_path", protocols.production_simulation.id
        )
        compute_dipoles.gradient_parameters = ProtocolPath(
            "parameter_gradient_keys", "global"
        )
        protocols.converge_uncertainty.add_protocols(compute_dipoles)

        protocols.analysis_protocol.volumes = ProtocolPath(
            f"observables[{ObservableType.Volume.value}]",
            protocols.production_simulation.id,
        )
        protocols.analysis_protocol.dipole_moments = ProtocolPath(
            "dipole_moments",
            compute_dipoles.id,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.decorrelate_trajectory.schema,
            protocols.decorrelate_observables.schema,
        ]

        schema.outputs_to_store = {"full_system": output_to_store}
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #14
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: openff.evaluator.unit.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        protocols, data_replicator = generate_base_reweighting_protocols(
            statistical_inefficiency=AverageDielectricConstant(
                "average_dielectric_$(data_replicator)"
            ),
            reweight_observable=ReweightDielectricConstant("reweight_dielectric"),
        )
        protocols.zero_gradients.input_observables = ProtocolPath(
            "output_observables[Volume]",
            protocols.join_observables.id,
        )
        protocols.statistical_inefficiency.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global"
        )
        protocols.reweight_observable.required_effective_samples = n_effective_samples

        # We don't need to perform bootstrapping as this protocol is only used to
        # calculate the statistical inefficiency and equilibration time. The
        # re-weighting protocol will instead compute the bootstrapped uncertainties.
        protocols.statistical_inefficiency.bootstrap_iterations = 1

        # Set up a protocol to re-evaluate the dipole moments at the target state
        # and concatenate the into a single array.
        compute_dipoles = ComputeDipoleMoments("compute_dipoles_$(data_replicator)")
        compute_dipoles.parameterized_system = ProtocolPath(
            "parameterized_system", protocols.build_target_system.id
        )
        compute_dipoles.trajectory_path = ProtocolPath(
            "trajectory_file_path", protocols.unpack_stored_data.id
        )
        compute_dipoles.gradient_parameters = ProtocolPath(
            "parameter_gradient_keys", "global"
        )
        join_dipoles = ConcatenateObservables("join_dipoles")
        join_dipoles.input_observables = ProtocolPath(
            "dipole_moments",
            compute_dipoles.id,
        )

        # Point the dielectric protocols to the volumes and dipole moments.
        protocols.statistical_inefficiency.volumes = ProtocolPath(
            "observables[Volume]", protocols.unpack_stored_data.id
        )
        protocols.statistical_inefficiency.dipole_moments = ProtocolPath(
            "dipole_moments", compute_dipoles.id
        )

        # Make sure to decorrelate the dipole moments.
        decorrelate_dipoles = DecorrelateObservables("decorrelate_dipoles")
        decorrelate_dipoles.time_series_statistics = ProtocolPath(
            "time_series_statistics", protocols.statistical_inefficiency.id
        )
        decorrelate_dipoles.input_observables = ProtocolPath(
            "output_observables", join_dipoles.id
        )

        protocols.reweight_observable.dipole_moments = ProtocolPath(
            "output_observables", decorrelate_dipoles.id
        )
        protocols.reweight_observable.volumes = ProtocolPath(
            "output_observables", protocols.decorrelate_observable.id
        )
        protocols.reweight_observable.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global"
        )

        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in protocols),
            compute_dipoles.schema,
            join_dipoles.schema,
            decorrelate_dipoles.schema,
        ]
        schema.protocol_replicators = [data_replicator]
        schema.final_value_source = ProtocolPath(
            "value", protocols.reweight_observable.id
        )

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #15
0
    def default_simulation_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:

            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "system_path", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "system_path", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #16
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=2000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_solvated_coordinates")
        build_full_coordinates.substance = ProtocolPath("substance", "global")
        build_full_coordinates.max_molecules = n_molecules

        assign_full_parameters = forcefield.BaseBuildSystem(
            "assign_solvated_parameters")
        assign_full_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_full_parameters.substance = ProtocolPath("substance", "global")
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = openmm.OpenMMEnergyMinimisation(
            "energy_minimisation")
        energy_minimisation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        equilibration_simulation = openmm.OpenMMSimulation(
            "equilibration_simulation")
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        equilibration_simulation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute")
        filter_solute.input_substance = ProtocolPath("substance", "global")
        filter_solute.component_roles = [Component.Role.Solute]

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_vacuum_coordinates")
        build_vacuum_coordinates.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BaseBuildSystem(
            "assign_parameters")
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_vacuum_parameters.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol("run_solvation_yank")
        run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id)
        run_yank.solvent_1 = ProtocolPath("filtered_substance",
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                    "global")
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            "output_coordinate_file", equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath("system_path",
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath("system_path",
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup("conditional_group")
        conditional_group.max_iterations = 20

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.left_hand_value = ProtocolPath(
                "estimated_free_energy.error", conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue("total_iterations")
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath("current_iteration",
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath("result",
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            build_full_coordinates.schema,
            assign_full_parameters.schema,
            energy_minimisation.schema,
            equilibration_simulation.schema,
            filter_solvent.schema,
            filter_solute.schema,
            build_vacuum_coordinates.schema,
            assign_vacuum_parameters.schema,
            conditional_group.schema,
        ]

        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 conditional_group.id,
                                                 run_yank.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=1000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Define the protocol which will extract the average dielectric constant
        # from the results of a simulation.
        extract_dielectric = ExtractAverageDielectric("extract_dielectric")
        extract_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        # Define the protocols which will run the simulation itself.
        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_dielectric,
            use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Make sure the input of the analysis protcol is properly hooked up.
        extract_dielectric.system_path = ProtocolPath(
            "system_path", protocols.assign_parameters.id)

        # Dielectric constants typically take longer to converge, so we need to
        # reflect this in the maximum number of convergence iterations.
        protocols.converge_uncertainty.max_iterations = 400

        # Set up the gradient calculations. For dielectric constants, we need to use
        # a slightly specialised reweighting protocol which we set up here.
        coordinate_source = ProtocolPath("output_coordinate_file",
                                         protocols.equilibration_simulation.id)
        trajectory_source = ProtocolPath(
            "trajectory_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )
        statistics_source = ProtocolPath(
            "statistics_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )

        gradient_mbar_protocol = ReweightDielectricConstant("gradient_mbar")
        gradient_mbar_protocol.reference_dipole_moments = [
            ProtocolPath(
                "dipole_moments",
                protocols.converge_uncertainty.id,
                extract_dielectric.id,
            )
        ]
        gradient_mbar_protocol.reference_volumes = [
            ProtocolPath("volumes", protocols.converge_uncertainty.id,
                         extract_dielectric.id)
        ]
        gradient_mbar_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        gradient_mbar_protocol.reference_reduced_potentials = statistics_source

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            gradient_mbar_protocol,
            ProtocolPath("force_field_path", "global"),
            coordinate_source,
            trajectory_source,
            statistics_source,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.schema,
        ]

        schema.protocol_replicators = [gradient_replicator]

        schema.outputs_to_store = {"full_system": output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema