def build_merge(prefix):

    # a - b \
    #       | - e - f
    # c - d /
    protocol_a = DummyInputOutputProtocol(prefix + "protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol(prefix + "protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)
    protocol_c = DummyInputOutputProtocol(prefix + "protocol_c")
    protocol_c.input_value = 2
    protocol_d = DummyInputOutputProtocol(prefix + "protocol_d")
    protocol_d.input_value = ProtocolPath("output_value", protocol_c.id)
    protocol_e = DummyInputOutputProtocol(prefix + "protocol_e")
    protocol_e.input_value = [
        ProtocolPath("output_value", protocol_b.id),
        ProtocolPath("output_value", protocol_d.id),
    ]
    protocol_f = DummyInputOutputProtocol(prefix + "protocol_f")
    protocol_f.input_value = ProtocolPath("output_value", protocol_e.id)

    return [
        protocol_a,
        protocol_b,
        protocol_c,
        protocol_d,
        protocol_e,
        protocol_f,
    ]
Example #2
0
def test_conditional_group_self_reference():
    """Tests that protocols within a conditional group
    can access the outputs of its parent, such as the
    current iteration of the group."""

    max_iterations = 10
    criteria = random.randint(1, max_iterations - 1)

    group = ConditionalGroup("conditional_group")
    group.max_iterations = max_iterations

    protocol = DummyInputOutputProtocol("protocol_a")
    protocol.input_value = ProtocolPath("current_iteration", group.id)

    condition_1 = ConditionalGroup.Condition()
    condition_1.left_hand_value = ProtocolPath("output_value", group.id,
                                               protocol.id)
    condition_1.right_hand_value = criteria
    condition_1.type = ConditionalGroup.Condition.Type.GreaterThan

    condition_2 = ConditionalGroup.Condition()
    condition_2.left_hand_value = ProtocolPath("current_iteration", group.id)
    condition_2.right_hand_value = criteria
    condition_2.type = ConditionalGroup.Condition.Type.GreaterThan

    group.add_protocols(protocol)
    group.add_condition(condition_1)
    group.add_condition(condition_2)

    with tempfile.TemporaryDirectory() as directory:

        group.execute(directory, ComputeResources())
        assert protocol.output_value == criteria + 1
Example #3
0
def test_conditional_protocol_group_fail():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = 2 * unit.kelvin

        value_protocol_a = DummyInputOutputProtocol("protocol_a")
        value_protocol_a.input_value = initial_value

        add_values = AddValues("add_values")
        add_values.values = [
            ProtocolPath("output_value", value_protocol_a.id),
            ProtocolPath("output_value", value_protocol_a.id),
        ]

        condition = ConditionalGroup.Condition()
        condition.left_hand_value = ProtocolPath("result", add_values.id)
        condition.right_hand_value = ProtocolPath("output_value",
                                                  value_protocol_a.id)
        condition.type = ConditionalGroup.Condition.Type.LessThan

        protocol_group = ConditionalGroup("protocol_group")
        protocol_group.conditions.append(condition)
        protocol_group.max_iterations = 10
        protocol_group.add_protocols(value_protocol_a, add_values)

        with pytest.raises(RuntimeError):
            protocol_group.execute(directory, ComputeResources())
Example #4
0
def test_conditional_protocol_group():

    with tempfile.TemporaryDirectory() as directory:

        initial_value = 2 * unit.kelvin

        value_protocol_a = DummyInputOutputProtocol("protocol_a")
        value_protocol_a.input_value = initial_value

        add_values = AddValues("add_values")
        add_values.values = [
            ProtocolPath("output_value", value_protocol_a.id),
            ProtocolPath("output_value", value_protocol_a.id),
        ]

        condition = ConditionalGroup.Condition()
        condition.left_hand_value = ProtocolPath("result", add_values.id)
        condition.right_hand_value = ProtocolPath("output_value",
                                                  value_protocol_a.id)
        condition.type = ConditionalGroup.Condition.Type.GreaterThan

        protocol_group = ConditionalGroup("protocol_group")
        protocol_group.conditions.append(condition)
        protocol_group.add_protocols(value_protocol_a, add_values)

        protocol_group.execute(directory, ComputeResources())

        assert (protocol_group.get_value(ProtocolPath(
            "result", add_values.id)) == 4 * unit.kelvin)
Example #5
0
def test_simple_workflow_graph(calculation_backend, compute_resources,
                               exception):

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [protocol_a.schema, protocol_b.schema]
    schema.final_value_source = ProtocolPath("output_value", protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        if calculation_backend is not None:

            with DaskLocalCluster() as calculation_backend:

                if exception:

                    with pytest.raises(AssertionError):

                        workflow_graph.execute(directory, calculation_backend,
                                               compute_resources)

                    return

                else:

                    results_futures = workflow_graph.execute(
                        directory, calculation_backend, compute_resources)

                assert len(results_futures) == 1
                result = results_futures[0].result()

        else:

            result = workflow_graph.execute(directory, calculation_backend,
                                            compute_resources)[0]

            if exception:

                with pytest.raises(AssertionError):

                    workflow_graph.execute(directory, calculation_backend,
                                           compute_resources)

                return

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
Example #6
0
def test_nested_input():

    dict_protocol = DummyInputOutputProtocol("dict_protocol")
    dict_protocol.input_value = {"a": ThermodynamicState(1.0 * unit.kelvin)}

    quantity_protocol = DummyInputOutputProtocol("quantity_protocol")
    quantity_protocol.input_value = ProtocolPath("output_value[a].temperature",
                                                 dict_protocol.id)

    schema = WorkflowSchema()
    schema.protocol_schemas = [dict_protocol.schema, quantity_protocol.schema]
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as temporary_directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(temporary_directory,
                                                     calculation_backend)

            assert len(results_futures) == 1
            result = results_futures[0].result()

    assert isinstance(result, WorkflowResult)
    def _get_unnested_protocol_path(protocol_path):
        """Returns a protocol path whose nested property name
        has been truncated to only include the top level name,
        e.g:

        `some_protocol_id.value.error` would be truncated to `some_protocol_id.value`

        and

        `some_protocol_id.value[1]` would be truncated to `some_protocol_id.value`

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to truncate.

        Returns
        -------
        ProtocolPath
            The truncated path.
        """
        property_name = protocol_path.property_name

        # Remove any nested property names from the path
        if protocol_path.property_name.find(".") >= 0:
            property_name = property_name.split(".")[0]

        # Remove any array indices from the path
        if protocol_path.property_name.find("[") >= 0:
            property_name = property_name.split("[")[0]

        return ProtocolPath(property_name, *protocol_path.protocol_ids)
def test_protocol_graph_execution(calculation_backend, compute_resources):

    if calculation_backend is not None:
        calculation_backend.start()

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_a, protocol_b)

    with tempfile.TemporaryDirectory() as directory:

        results = protocol_graph.execute(directory, calculation_backend,
                                         compute_resources)

        final_result = results[protocol_b.id]

        if calculation_backend is not None:
            final_result = final_result.result()

        with open(final_result[1]) as file:
            results_b = json.load(file, cls=TypedJSONDecoder)

    assert results_b[".output_value"] == protocol_a.input_value

    if compute_resources is not None:
        assert protocol_b.output_value == protocol_a.input_value

    if calculation_backend is not None:
        calculation_backend.stop()
Example #9
0
def test_index_replicated_protocol():

    replicator = ProtocolReplicator("replicator")
    replicator.template_values = ["a", "b", "c", "d"]

    replicated_protocol = DummyInputOutputProtocol(
        f"protocol_{replicator.placeholder_id}")
    replicated_protocol.input_value = ReplicatorValue(replicator.id)

    schema = WorkflowSchema()
    schema.protocol_replicators = [replicator]
    schema.protocol_schemas = [replicated_protocol.schema]

    for index in range(len(replicator.template_values)):

        indexing_protocol = DummyInputOutputProtocol(
            f"indexing_protocol_{index}")
        indexing_protocol.input_value = ProtocolPath("output_value",
                                                     f"protocol_{index}")
        schema.protocol_schemas.append(indexing_protocol.schema)

    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema
def test_protocol_group_resume():
    """A test that protocol groups can recover after being killed
    (e.g. by a worker being killed due to hitting a wallclock limit)
    """

    compute_resources = ComputeResources()

    # Fake a protocol group which executes the first
    # two protocols and then 'gets killed'.
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    # Remove the output file so it appears the the protocol group had not
    # completed.
    os.unlink(
        os.path.join("graph_a", protocol_group_a.id,
                     f"{protocol_group_a.id}_output.json"))

    # Build the 'full' group with the last two protocols which
    # 'had not been exited' after the group was 'killed'
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)
    protocol_c = DummyInputOutputProtocol("protocol_c")
    protocol_c.input_value = ProtocolPath("output_value", protocol_b.id)
    protocol_d = DummyInputOutputProtocol("protocol_d")
    protocol_d.input_value = ProtocolPath("output_value", protocol_c.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b, protocol_c,
                                   protocol_d)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    assert all(x != UNDEFINED for x in protocol_group_a.outputs.values())
    def build_graph(prefix):

        merger = build_merge(prefix)
        fork = build_fork(prefix)

        fork[0].input_value = ProtocolPath("output_value",
                                           prefix + "protocol_f")
        return [*merger, *fork]
def test_protocol_group_execution():

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_group = ProtocolGroup("protocol_group")
    protocol_group.add_protocols(protocol_a, protocol_b)

    with tempfile.TemporaryDirectory() as directory:

        protocol_group.execute(directory, ComputeResources())

    value_path = ProtocolPath("output_value", protocol_group.id, protocol_b.id)
    final_value = protocol_group.get_value(value_path)

    assert final_value == protocol_a.input_value
Example #13
0
def test_workflow_with_groups():

    expected_value = (1 * unit.kelvin).plus_minus(0.1 * unit.kelvin)

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = expected_value
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    conditional_group = ConditionalGroup("conditional_group")
    conditional_group.add_protocols(protocol_a, protocol_b)

    condition = ConditionalGroup.Condition()
    condition.right_hand_value = 2 * unit.kelvin
    condition.type = ConditionalGroup.Condition.Type.LessThan
    condition.left_hand_value = ProtocolPath("output_value.value",
                                             conditional_group.id,
                                             protocol_b.id)
    conditional_group.add_condition(condition)

    schema = WorkflowSchema()
    schema.protocol_schemas = [conditional_group.schema]
    schema.final_value_source = ProtocolPath("output_value",
                                             conditional_group.id,
                                             protocol_b.id)
    schema.validate()

    workflow = Workflow({})
    workflow.schema = schema

    workflow_graph = workflow.to_graph()

    with tempfile.TemporaryDirectory() as directory:

        with DaskLocalCluster() as calculation_backend:

            results_futures = workflow_graph.execute(directory,
                                                     calculation_backend)
            assert len(results_futures) == 1

            result = results_futures[0].result()

        assert isinstance(result, WorkflowResult)
        assert result.value.value == expected_value.value
def build_fork(prefix):
    #          / i - j
    # g - h - |
    #          \ k - l
    protocol_g = DummyInputOutputProtocol(prefix + "protocol_g")
    protocol_g.input_value = 3
    protocol_h = DummyInputOutputProtocol(prefix + "protocol_h")
    protocol_h.input_value = ProtocolPath("output_value", protocol_g.id)
    protocol_i = DummyInputOutputProtocol(prefix + "protocol_i")
    protocol_i.input_value = ProtocolPath("output_value", protocol_h.id)
    protocol_j = DummyInputOutputProtocol(prefix + "protocol_j")
    protocol_j.input_value = ProtocolPath("output_value", protocol_i.id)
    protocol_k = DummyInputOutputProtocol(prefix + "protocol_k")
    protocol_k.input_value = ProtocolPath("output_value", protocol_h.id)
    protocol_l = DummyInputOutputProtocol(prefix + "protocol_l")
    protocol_l.input_value = ProtocolPath("output_value", protocol_k.id)

    return [
        protocol_g, protocol_h, protocol_i, protocol_j, protocol_k, protocol_l
    ]
    def build_protocols(prefix):

        #     .-------------------.
        #     |          / i - j -|- b
        # a - | g - h - |         |
        #     |          \ k - l -|- c
        #     .-------------------.
        protocol_a = DummyInputOutputProtocol(prefix + "protocol_a")
        protocol_a.input_value = 1
        fork_protocols = build_fork(prefix)
        fork_protocols[0].input_value = ProtocolPath("output_value",
                                                     protocol_a.id)
        protocol_group = ProtocolGroup(prefix + "protocol_group")
        protocol_group.add_protocols(*fork_protocols)
        protocol_b = DummyInputOutputProtocol(prefix + "protocol_b")
        protocol_b.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_j")
        protocol_c = DummyInputOutputProtocol(prefix + "protocol_c")
        protocol_c.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_l")

        return [protocol_a, protocol_group, protocol_b, protocol_c]
Example #16
0
    def _get_unreplicated_path(self, protocol_path):
        """Checks to see if the protocol pointed to by this path will only
        exist after a replicator has been applied, and if so, returns a
        path to the unreplicated protocol.

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to convert to an unreplicated path.

        Returns
        -------
        ProtocolPath
            The path which should point to only unreplicated protocols
        """

        if self.protocol_replicators == UNDEFINED:
            return protocol_path.copy()

        full_unreplicated_path = str(protocol_path.full_path)

        for replicator in self.protocol_replicators:

            if replicator.placeholder_id in full_unreplicated_path:
                continue

            protocols_to_replicate = self._find_protocols_to_be_replicated(
                replicator)

            for protocol_id in protocols_to_replicate:

                match_pattern = re.escape(
                    protocol_id.replace(replicator.placeholder_id, r"\d+"))
                match_pattern = match_pattern.replace(re.escape(r"\d+"),
                                                      r"\d+")

                full_unreplicated_path = re.sub(match_pattern, protocol_id,
                                                full_unreplicated_path)

        return ProtocolPath.from_string(full_unreplicated_path)
def test_advanced_nested_replicators():

    dummy_schema = WorkflowSchema()

    replicator_a = ProtocolReplicator(replicator_id="replicator_a")
    replicator_a.template_values = ["a", "b"]

    replicator_b = ProtocolReplicator(
        replicator_id=f"replicator_b_{replicator_a.placeholder_id}")
    replicator_b.template_values = ProtocolPath(
        f"dummy_list[{replicator_a.placeholder_id}]", "global")

    dummy_protocol = DummyReplicableProtocol(f"dummy_"
                                             f"{replicator_a.placeholder_id}_"
                                             f"{replicator_b.placeholder_id}")

    dummy_protocol.replicated_value_a = ReplicatorValue(replicator_a.id)
    dummy_protocol.replicated_value_b = ReplicatorValue(replicator_b.id)

    dummy_schema.protocol_schemas = [dummy_protocol.schema]
    dummy_schema.protocol_replicators = [replicator_a, replicator_b]

    dummy_schema.validate()

    dummy_property = create_dummy_property(Density)
    dummy_metadata = Workflow.generate_default_metadata(
        dummy_property, "smirnoff99Frosst-1.1.0.offxml", [])
    dummy_metadata["dummy_list"] = [[1], [2]]

    dummy_workflow = Workflow(dummy_metadata, "")
    dummy_workflow.schema = dummy_schema

    assert len(dummy_workflow.protocols) == 2

    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_a == "a"
    assert dummy_workflow.protocols["dummy_0_0"].replicated_value_b == 1

    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_a == "b"
    assert dummy_workflow.protocols["dummy_1_0"].replicated_value_b == 2
Example #18
0
    def _get_reweighting_protocols(
        id_suffix,
        gradient_replicator_id,
        data_replicator_id,
        replicator_id=None,
        weight_by_mole_fraction=False,
        substance_reference=None,
        n_effective_samples=50,
    ):

        """Returns the set of protocols which when combined in a workflow
        will yield the molar volume of a substance by reweighting cached data.

        Parameters
        ----------
        id_suffix: str
            A suffix to append to the id of each of the returned protocols.
        gradient_replicator_id: str
            The id of the replicator which will clone those protocols which will
            estimate the gradient of the molar volume with respect to a given parameter.
        data_replicator_id: str
            The id of the replicator which will be used to clone these protocols
            for each cached simulation data.
        replicator_id: str, optional
            The optional id of the replicator which will be used to clone these
            protocols, e.g. for each component in the system.
        weight_by_mole_fraction: bool
            If true, an extra protocol will be added to weight the calculated
            molar volume by the mole fraction of the component.
        substance_reference: ProtocolPath or PlaceholderValue, optional
            An optional protocol path (or replicator reference) to the substance
            whose molar volume is being estimated.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        BaseReweightingProtocols
            The protocols used to estimate the molar volume of a substance.
        ProtocolPath
            A reference to the estimated molar volume.
        ProtocolReplicator
            The replicator which will replicate each protocol for each
            cached simulation datum.
        ProtocolGroup
            The group of protocols which will calculate the gradient of the reduced potential
            with respect to a given property.
        ProtocolPath
            A reference to the value of the gradient.
        """

        if replicator_id is not None:
            id_suffix = f"{id_suffix}_$({replicator_id})"

        full_id_suffix = id_suffix

        if data_replicator_id is not None:
            full_id_suffix = f"{id_suffix}_$({data_replicator_id})"

        if substance_reference is None:
            substance_reference = ProtocolPath("substance", "global")

        extract_volume = analysis.ExtractAverageStatistic(
            f"extract_volume{full_id_suffix}"
        )
        extract_volume.statistics_type = ObservableType.Volume
        reweight_volume = reweighting.ReweightStatistics(f"reweight_volume{id_suffix}")
        reweight_volume.statistics_type = ObservableType.Volume
        reweight_volume.required_effective_samples = n_effective_samples

        (protocols, data_replicator) = generate_base_reweighting_protocols(
            analysis_protocol=extract_volume,
            mbar_protocol=reweight_volume,
            replicator_id=data_replicator_id,
            id_suffix=id_suffix,
        )

        # Make sure to use the correct substance.
        protocols.build_target_system.substance = substance_reference

        value_source = ProtocolPath("value", protocols.mbar_protocol.id)

        # Set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the full system volume.
        weight_volume = None

        if weight_by_mole_fraction is True:
            weight_volume = miscellaneous.WeightByMoleFraction(
                f"weight_volume{id_suffix}"
            )
            weight_volume.value = ProtocolPath("value", protocols.mbar_protocol.id)
            weight_volume.full_substance = ProtocolPath("substance", "global")
            weight_volume.component = substance_reference

            value_source = ProtocolPath("weighted_value", weight_volume.id)

        # Divide by the component molar volumes by the number of molecules in the system
        number_of_molecules = ProtocolPath(
            "total_number_of_molecules",
            protocols.unpack_stored_data.id.replace(f"$({data_replicator_id})", "0"),
        )

        number_of_molar_molecules = miscellaneous.MultiplyValue(
            f"number_of_molar_molecules{id_suffix}"
        )
        number_of_molar_molecules.value = (1.0 / unit.avogadro_constant).to(unit.mole)
        number_of_molar_molecules.multiplier = number_of_molecules

        divide_by_molecules = miscellaneous.DivideValue(
            f"divide_by_molecules{id_suffix}"
        )
        divide_by_molecules.value = value_source
        divide_by_molecules.divisor = ProtocolPath(
            "result", number_of_molar_molecules.id
        )

        value_source = ProtocolPath("result", divide_by_molecules.id)

        # Set up the gradient calculations.
        reweight_volume_template = copy.deepcopy(reweight_volume)

        coordinate_path = ProtocolPath(
            "output_coordinate_path", protocols.concatenate_trajectories.id
        )
        trajectory_path = ProtocolPath(
            "output_trajectory_path", protocols.concatenate_trajectories.id
        )
        statistics_path = ProtocolPath(
            "statistics_file_path", protocols.reduced_target_potential.id
        )

        gradient_group, _, gradient_source = generate_gradient_protocol_group(
            reweight_volume_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_path,
            trajectory_path,
            statistics_path,
            replicator_id=gradient_replicator_id,
            id_suffix=id_suffix,
            substance_source=substance_reference,
            effective_sample_indices=ProtocolPath(
                "effective_sample_indices", protocols.mbar_protocol.id
            ),
        )

        # Remove the group id from the path.
        gradient_source.pop_next_in_path()

        if weight_by_mole_fraction is True:
            # The component workflows need an extra step to multiply their gradients by their
            # relative mole fraction.
            weight_gradient = miscellaneous.WeightByMoleFraction(
                f"weight_gradient_$({gradient_replicator_id})_"
                f"by_mole_fraction{id_suffix}"
            )
            weight_gradient.value = gradient_source
            weight_gradient.full_substance = ProtocolPath("substance", "global")
            weight_gradient.component = substance_reference

            gradient_group.add_protocols(weight_gradient)
            gradient_source = ProtocolPath("weighted_value", weight_gradient.id)

        scale_gradient = miscellaneous.DivideValue(
            f"scale_gradient_$({gradient_replicator_id}){id_suffix}"
        )
        scale_gradient.value = gradient_source
        scale_gradient.divisor = ProtocolPath("result", number_of_molar_molecules.id)

        gradient_group.add_protocols(scale_gradient)
        gradient_source = ProtocolPath("result", gradient_group.id, scale_gradient.id)

        all_protocols = (*protocols, number_of_molar_molecules, divide_by_molecules)

        if weight_volume is not None:
            all_protocols = (*all_protocols, weight_volume)

        return (
            all_protocols,
            value_source,
            data_replicator,
            gradient_group,
            gradient_source,
        )
Example #19
0
    def default_simulation_schema(existing_schema=None):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        existing_schema: SimulationSchema, optional
            An existing schema whose settings to use. If set,
            the schema's `workflow_schema` will be overwritten
            by this method.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """

        calculation_schema = SimulationSchema()

        if existing_schema is not None:

            assert isinstance(existing_schema, SimulationSchema)
            calculation_schema = copy.deepcopy(existing_schema)

        schema = WorkflowSchema(
            property_type=HostGuestBindingAffinity.__name__)
        schema.id = "{}{}".format(HostGuestBindingAffinity.__name__, "Schema")

        # Initial coordinate and topology setup.
        filter_ligand = miscellaneous.FilterSubstanceByRole("filter_ligand")
        filter_ligand.input_substance = ProtocolPath("substance", "global")

        filter_ligand.component_roles = [Component.Role.Ligand]
        # We only support substances with a single guest ligand.
        filter_ligand.expected_components = 1

        schema.protocols[filter_ligand.id] = filter_ligand.schema

        # Construct the protocols which will (for now) take as input a set of host coordinates,
        # and generate a set of charges for them.
        filter_receptor = miscellaneous.FilterSubstanceByRole(
            "filter_receptor")
        filter_receptor.input_substance = ProtocolPath("substance", "global")

        filter_receptor.component_roles = [Component.Role.Receptor]
        # We only support substances with a single host receptor.
        filter_receptor.expected_components = 1

        schema.protocols[filter_receptor.id] = filter_receptor.schema

        # Perform docking to position the guest within the host.
        perform_docking = coordinates.BuildDockedCoordinates("perform_docking")

        perform_docking.ligand_substance = ProtocolPath(
            "filtered_substance", filter_ligand.id)
        perform_docking.receptor_coordinate_file = ProtocolPath(
            "receptor_mol2", "global")

        schema.protocols[perform_docking.id] = perform_docking.schema

        # Solvate the docked structure using packmol
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_roles = [Component.Role.Solvent]

        schema.protocols[filter_solvent.id] = filter_solvent.schema

        solvate_complex = coordinates.SolvateExistingStructure(
            "solvate_complex")
        solvate_complex.max_molecules = 1000

        solvate_complex.substance = ProtocolPath("filtered_substance",
                                                 filter_solvent.id)
        solvate_complex.solute_coordinate_file = ProtocolPath(
            "docked_complex_coordinate_path", perform_docking.id)

        schema.protocols[solvate_complex.id] = solvate_complex.schema

        # Assign force field parameters to the solvated complex system.
        build_solvated_complex_system = forcefield.BaseBuildSystem(
            "build_solvated_complex_system")

        build_solvated_complex_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_complex_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        build_solvated_complex_system.substance = ProtocolPath(
            "substance", "global")

        build_solvated_complex_system.charged_molecule_paths = [
            ProtocolPath("receptor_mol2", "global")
        ]

        schema.protocols[build_solvated_complex_system.
                         id] = build_solvated_complex_system.schema

        # Solvate the ligand using packmol
        solvate_ligand = coordinates.SolvateExistingStructure("solvate_ligand")
        solvate_ligand.max_molecules = 1000

        solvate_ligand.substance = ProtocolPath("filtered_substance",
                                                filter_solvent.id)
        solvate_ligand.solute_coordinate_file = ProtocolPath(
            "docked_ligand_coordinate_path", perform_docking.id)

        schema.protocols[solvate_ligand.id] = solvate_ligand.schema

        # Assign force field parameters to the solvated ligand system.
        build_solvated_ligand_system = forcefield.BaseBuildSystem(
            "build_solvated_ligand_system")

        build_solvated_ligand_system.force_field_path = ProtocolPath(
            "force_field_path", "global")

        build_solvated_ligand_system.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        build_solvated_ligand_system.substance = ProtocolPath(
            "substance", "global")

        schema.protocols[build_solvated_ligand_system.
                         id] = build_solvated_ligand_system.schema

        # Employ YANK to estimate the binding free energy.
        yank_protocol = yank.LigandReceptorYankProtocol("yank_protocol")

        yank_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")

        yank_protocol.number_of_iterations = 2000
        yank_protocol.steps_per_iteration = 500
        yank_protocol.checkpoint_interval = 10

        yank_protocol.verbose = True

        yank_protocol.force_field_path = ProtocolPath("force_field_path",
                                                      "global")

        yank_protocol.ligand_residue_name = ProtocolPath(
            "ligand_residue_name", perform_docking.id)
        yank_protocol.receptor_residue_name = ProtocolPath(
            "receptor_residue_name", perform_docking.id)

        yank_protocol.solvated_ligand_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_ligand.id)
        yank_protocol.solvated_ligand_system = ProtocolPath(
            "system_path", build_solvated_ligand_system.id)

        yank_protocol.solvated_complex_coordinates = ProtocolPath(
            "coordinate_file_path", solvate_complex.id)
        yank_protocol.solvated_complex_system = ProtocolPath(
            "system_path", build_solvated_complex_system.id)

        schema.protocols[yank_protocol.id] = yank_protocol.schema

        # Define where the final values come from.
        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 yank_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #20
0
    def update_references(self, protocols, replication_map, template_values):
        """Redirects the input references of protocols to the replicated
        versions.

        Parameters
        ----------
        protocols: dict of str and Protocol
            The protocols which have had this replicator applied
            to them.
        replication_map: dict of ProtocolPath and list of tuple of ProtocolPath and int
            A dictionary of references to all of the protocols which have
            been replicated, with keys of original protocol ids. Each value
            is comprised of a list of the replicated protocol ids, and their
            index into the `template_values` array.
        template_values: List of Any
            A list of the values which will be inserted
            into the newly replicated protocols.
        """

        inverse_replication_map = {}

        for original_id, replicated_ids in replication_map.items():
            for replicated_id, index in replicated_ids:
                inverse_replication_map[replicated_id] = (original_id, index)

        for protocol_id, protocol in protocols.items():

            # Look at each of the protocols inputs and see if its value is either a ProtocolPath,
            # or a list of ProtocolPath's.
            for required_input in protocol.required_inputs:

                all_value_references = protocol.get_value_references(
                    required_input)
                replicated_value_references = {}

                for source_path, value_reference in all_value_references.items(
                ):

                    if self.placeholder_id not in value_reference.full_path:
                        continue

                    replicated_value_references[source_path] = value_reference

                # If this protocol does not take input from one of the replicated protocols,
                # then we are done.
                if len(replicated_value_references) == 0:
                    continue

                for source_path, value_reference in replicated_value_references.items(
                ):

                    full_source_path = source_path.copy()
                    full_source_path.prepend_protocol_id(protocol_id)

                    # If the protocol was not itself replicated by this replicator, its value
                    # is set to a list containing references to all newly replicated protocols.
                    # Otherwise, the value will be set to a reference to just the protocol which
                    # was replicated using the same index.
                    value_source = [
                        ProtocolPath.from_string(
                            value_reference.full_path.replace(
                                self.placeholder_id, str(index)))
                        for index in range(len(template_values))
                    ]

                    for replicated_id, map_tuple in inverse_replication_map.items(
                    ):

                        original_id, replicated_index = map_tuple

                        if (full_source_path.protocol_path !=
                                replicated_id.protocol_path):
                            continue

                        value_source = ProtocolPath.from_string(
                            value_reference.full_path.replace(
                                self.placeholder_id, str(replicated_index)))

                        break

                    # Replace the input value with a list of ProtocolPath's that point to
                    # the newly generated protocols.
                    protocol.set_value(source_path, value_source)
Example #21
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        # Define the id of the replicator which will clone the gradient protocols
        # for each gradient key to be estimated.
        gradient_replicator_id = "gradient_replicator"

        # Set up a workflow to calculate the molar volume of the full, mixed system.
        (
            full_system_protocols,
            full_system_molar_molecules,
            full_system_volume,
            full_output,
            full_system_gradient_group,
            full_system_gradient_replicator,
            full_system_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_full",
            gradient_replicator_id,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Set up a general workflow for calculating the molar volume of one of the system components.
        component_replicator_id = "component_replicator"
        component_substance = ReplicatorValue(component_replicator_id)

        # Make sure to weight by the mole fractions of the actual full system as these may be slightly
        # different to the mole fractions of the measure property due to rounding.
        full_substance = ProtocolPath(
            "output_substance", full_system_protocols.build_coordinates.id
        )

        (
            component_protocols,
            component_molar_molecules,
            component_volumes,
            component_output,
            component_gradient_group,
            component_gradient_replicator,
            component_gradient,
        ) = ExcessMolarVolume._get_simulation_protocols(
            "_component",
            gradient_replicator_id,
            replicator_id=component_replicator_id,
            weight_by_mole_fraction=True,
            component_substance_reference=component_substance,
            full_substance_reference=full_substance,
            use_target_uncertainty=use_target_uncertainty,
            n_molecules=n_molecules,
        )

        # Finally, set up the protocols which will be responsible for adding together
        # the component molar volumes, and subtracting these from the mixed system molar volume.
        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_volume"
        )
        calculate_excess_volume.value_b = full_system_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Create the replicator object which defines how the pure component
        # molar volume estimation protocols will be replicated for each component.
        component_replicator = ProtocolReplicator(replicator_id=component_replicator_id)
        component_replicator.template_values = ProtocolPath("components", "global")

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_$({gradient_replicator_id})"
        )
        add_component_gradients.values = component_gradient

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_$({gradient_replicator_id})"
        )
        combine_gradients.value_b = full_system_gradient
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Combine the gradient replicators.
        gradient_replicator = ProtocolReplicator(replicator_id=gradient_replicator_id)
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Build the final workflow schema
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            component_protocols.build_coordinates.schema,
            component_protocols.assign_parameters.schema,
            component_protocols.energy_minimisation.schema,
            component_protocols.equilibration_simulation.schema,
            component_protocols.converge_uncertainty.schema,
            component_molar_molecules.schema,
            full_system_protocols.build_coordinates.schema,
            full_system_protocols.assign_parameters.schema,
            full_system_protocols.energy_minimisation.schema,
            full_system_protocols.equilibration_simulation.schema,
            full_system_protocols.converge_uncertainty.schema,
            full_system_molar_molecules.schema,
            component_protocols.extract_uncorrelated_trajectory.schema,
            component_protocols.extract_uncorrelated_statistics.schema,
            full_system_protocols.extract_uncorrelated_trajectory.schema,
            full_system_protocols.extract_uncorrelated_statistics.schema,
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            component_gradient_group.schema,
            full_system_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [gradient_replicator, component_replicator]

        # Finally, tell the schemas where to look for its final values.
        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        schema.outputs_to_store = {
            "full_system": full_output,
            f"component_$({component_replicator_id})": component_output,
        }

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #22
0
    def apply(self,
              protocols,
              template_values=None,
              template_index=-1,
              template_value=None):
        """Applies this replicator to the provided set of protocols and any of
        their children.

        This protocol should be followed by a call to `update_references`
        to ensure that all protocols which take their input from a replicated
        protocol get correctly updated.

        Parameters
        ----------
        protocols: dict of str and Protocol
            The protocols to apply the replicator to.
        template_values: list of Any
            A list of the values which will be inserted
            into the newly replicated protocols.

            This parameter is mutually exclusive with
            `template_index` and `template_value`
        template_index: int, optional
            A specific value which should be used for any
            protocols flagged as to be replicated by this
            replicator. This option is mainly used when
            replicating children of an already replicated
            protocol.

            This parameter is mutually exclusive with
            `template_values` and must be set along with
            a `template_value`.
        template_value: Any, optional
            A specific index which should be used for any
            protocols flagged as to be replicated by this
            replicator. This option is mainly used when
            replicating children of an already replicated
            protocol.

            This parameter is mutually exclusive with
            `template_values` and must be set along with
            a `template_index`.

        Returns
        -------
        dict of str and Protocol
            The replicated protocols.
        dict of ProtocolPath and list of tuple of ProtocolPath and int
            A dictionary of references to all of the protocols which have
            been replicated, with keys of original protocol ids. Each value
            is comprised of a list of the replicated protocol ids, and their
            index into the `template_values` array.
        """

        if (template_values is not None and
            (template_index >= 0 or template_value is not None)) or (
                template_values is None and
                (template_index < 0 or template_value is None)):

            raise ValueError(
                f"Either the template values array must be set, or a specific "
                f"template index and value must be passed.")

        replicated_protocols = {}
        replicated_protocol_map = {}

        for protocol_id, protocol in protocols.items():

            should_replicate = self.placeholder_id in protocol_id

            # If this protocol should not be directly replicated then try and
            # replicate any child protocols...
            if not should_replicate:

                replicated_protocols[protocol_id] = protocol

                if template_index is not None and template_index >= 0:
                    # Make sure to include children of replicated protocols in the
                    # map to ensure correct behaviour when updating children of replicated
                    # protocols which have the replicator id in their name, and take input
                    # from another child protocol which doesn't have the replicator id in
                    # its name.
                    if ProtocolPath(
                            "", protocol_id) not in replicated_protocol_map:
                        replicated_protocol_map[ProtocolPath(
                            "", protocol_id)] = []

                    replicated_protocol_map[ProtocolPath(
                        "", protocol_id)].append(
                            (ProtocolPath("", protocol_id), template_index))

                self._apply_to_protocol_children(
                    protocol,
                    replicated_protocol_map,
                    template_values,
                    template_index,
                    template_value,
                )

                continue

            # ..otherwise, we need to replicate this protocol.
            replicated_protocols.update(
                self._apply_to_protocol(
                    protocol,
                    replicated_protocol_map,
                    template_values,
                    template_index,
                    template_value,
                ))

        return replicated_protocols, replicated_protocol_map
Example #23
0
    def _apply_to_protocol(
        self,
        protocol,
        replicated_protocol_map,
        template_values=None,
        template_index=-1,
        template_value=None,
    ):

        replicated_protocol_map[ProtocolPath("", protocol.id)] = []
        replicated_protocols = {}

        template_values_dict = {template_index: template_value}

        if template_values is not None:

            template_values_dict = {
                index: template_value
                for index, template_value in enumerate(template_values)
            }

        for index, template_value in template_values_dict.items():

            protocol_schema = protocol.schema
            protocol_schema.id = protocol_schema.id.replace(
                self.placeholder_id, str(index))

            replicated_protocol = registered_workflow_protocols[
                protocol_schema.type](protocol_schema.id)
            replicated_protocol.schema = protocol_schema

            replicated_protocol_map[ProtocolPath("", protocol.id)].append(
                (ProtocolPath("", replicated_protocol.id), index))

            # Pass the template values to any inputs which require them.
            for required_input in replicated_protocol.required_inputs:

                input_value = replicated_protocol.get_value(required_input)

                if not isinstance(input_value, ReplicatorValue):
                    continue

                elif input_value.replicator_id != self.id:

                    input_value.replicator_id = input_value.replicator_id.replace(
                        self.placeholder_id, str(index))
                    continue

                replicated_protocol.set_value(required_input, template_value)

            self._apply_to_protocol_children(
                replicated_protocol,
                replicated_protocol_map,
                None,
                index,
                template_value,
            )

            replicated_protocols[replicated_protocol.id] = replicated_protocol

        return replicated_protocols
Example #24
0
def generate_base_reweighting_protocols(
    analysis_protocol, mbar_protocol, replicator_id="data_repl", id_suffix="",
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will take input from the stored data,
        and generate a set of observables to reweight.
    mbar_protocol: BaseReweightingProtocol
        A template mbar reweighting protocol, which has it's reference
        observables already set. This method will automatically set the
        reduced potentials on this object.
    replicator_id: str
        The id to use for the data replicator.
    id_suffix: str
        A string suffix to append to each of the protocol ids.

    Returns
    -------
    BaseReweightingProtocols:
        A named tuple of the protocol which should form the bulk of
        a property estimation workflow.
    ProtocolReplicator:
        A replicator which will clone the workflow for each piece of
        stored data.
    """

    assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol)

    assert f"$({replicator_id})" in analysis_protocol.id
    assert f"$({replicator_id})" not in mbar_protocol.id

    replicator_suffix = "_$({}){}".format(replicator_id, id_suffix)

    # Unpack all the of the stored data.
    unpack_stored_data = storage.UnpackStoredSimulationData(
        "unpack_data{}".format(replicator_suffix)
    )
    unpack_stored_data.simulation_data_path = ReplicatorValue(replicator_id)

    # The autocorrelation time of each of the stored files will be calculated for this property
    # using the passed in analysis protocol.
    if isinstance(analysis_protocol, analysis.ExtractAverageStatistic):

        analysis_protocol.statistics_path = ProtocolPath(
            "statistics_file_path", unpack_stored_data.id
        )

    elif isinstance(analysis_protocol, analysis.AverageTrajectoryProperty):

        analysis_protocol.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", unpack_stored_data.id
        )
        analysis_protocol.trajectory_path = ProtocolPath(
            "trajectory_file_path", unpack_stored_data.id
        )

    # Decorrelate the frames of the stored trajectory and statistics arrays.
    decorrelate_statistics = analysis.ExtractUncorrelatedStatisticsData(
        "decorrelate_stats{}".format(replicator_suffix)
    )
    decorrelate_statistics.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id
    )
    decorrelate_statistics.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id
    )
    decorrelate_statistics.input_statistics_path = ProtocolPath(
        "statistics_file_path", unpack_stored_data.id
    )

    decorrelate_trajectory = analysis.ExtractUncorrelatedTrajectoryData(
        "decorrelate_traj{}".format(replicator_suffix)
    )
    decorrelate_trajectory.statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", analysis_protocol.id
    )
    decorrelate_trajectory.equilibration_index = ProtocolPath(
        "equilibration_index", analysis_protocol.id
    )
    decorrelate_trajectory.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    decorrelate_trajectory.input_trajectory_path = ProtocolPath(
        "trajectory_file_path", unpack_stored_data.id
    )

    # Stitch together all of the trajectories
    join_trajectories = reweighting.ConcatenateTrajectories("concat_traj" + id_suffix)
    join_trajectories.input_coordinate_paths = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    join_trajectories.input_trajectory_paths = ProtocolPath(
        "output_trajectory_path", decorrelate_trajectory.id
    )

    join_statistics = reweighting.ConcatenateStatistics("concat_stats" + id_suffix)
    join_statistics.input_statistics_paths = ProtocolPath(
        "output_statistics_path", decorrelate_statistics.id
    )

    # Calculate the reduced potentials for each of the reference states.
    build_reference_system = forcefield.BaseBuildSystem(
        "build_system{}".format(replicator_suffix)
    )
    build_reference_system.force_field_path = ProtocolPath(
        "force_field_path", unpack_stored_data.id
    )
    build_reference_system.substance = ProtocolPath("substance", unpack_stored_data.id)
    build_reference_system.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )

    reduced_reference_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential{}".format(replicator_suffix)
    )
    reduced_reference_potential.system_path = ProtocolPath(
        "system_path", build_reference_system.id
    )
    reduced_reference_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", unpack_stored_data.id
    )
    reduced_reference_potential.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", unpack_stored_data.id
    )
    reduced_reference_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )
    reduced_reference_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id
    )

    # Calculate the reduced potential of the target state.
    build_target_system = forcefield.BaseBuildSystem("build_system_target" + id_suffix)
    build_target_system.force_field_path = ProtocolPath("force_field_path", "global")
    build_target_system.substance = ProtocolPath("substance", "global")
    build_target_system.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )

    reduced_target_potential = openmm.OpenMMReducedPotentials(
        "reduced_potential_target" + id_suffix
    )
    reduced_target_potential.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    reduced_target_potential.system_path = ProtocolPath(
        "system_path", build_target_system.id
    )
    reduced_target_potential.coordinate_file_path = ProtocolPath(
        "output_coordinate_path", join_trajectories.id
    )
    reduced_target_potential.trajectory_file_path = ProtocolPath(
        "output_trajectory_path", join_trajectories.id
    )
    reduced_target_potential.kinetic_energies_path = ProtocolPath(
        "output_statistics_path", join_statistics.id
    )

    # Finally, apply MBAR to get the reweighted value.
    mbar_protocol.reference_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_reference_potential.id
    )
    mbar_protocol.target_reduced_potentials = ProtocolPath(
        "statistics_file_path", reduced_target_potential.id
    )

    if (
        isinstance(mbar_protocol, reweighting.ReweightStatistics)
        and mbar_protocol.statistics_type != ObservableType.PotentialEnergy
        and mbar_protocol.statistics_type != ObservableType.TotalEnergy
        and mbar_protocol.statistics_type != ObservableType.Enthalpy
        and mbar_protocol.statistics_type != ObservableType.ReducedPotential
    ):

        mbar_protocol.statistics_paths = ProtocolPath(
            "output_statistics_path", decorrelate_statistics.id
        )

    elif isinstance(mbar_protocol, reweighting.ReweightStatistics):

        mbar_protocol.statistics_paths = [
            ProtocolPath("statistics_file_path", reduced_target_potential.id)
        ]
        mbar_protocol.frame_counts = ProtocolPath(
            "number_of_uncorrelated_samples", decorrelate_statistics.id
        )

    base_protocols = BaseReweightingProtocols(
        unpack_stored_data,
        analysis_protocol,
        decorrelate_statistics,
        decorrelate_trajectory,
        join_trajectories,
        join_statistics,
        build_reference_system,
        reduced_reference_potential,
        build_target_system,
        reduced_target_potential,
        mbar_protocol,
    )

    # Create the replicator object.
    component_replicator = ProtocolReplicator(replicator_id=replicator_id)
    component_replicator.template_values = ProtocolPath("full_system_data", "global")

    return base_protocols, component_replicator
Example #25
0
    def default_simulation_schema(absolute_tolerance=UNDEFINED,
                                  relative_tolerance=UNDEFINED,
                                  n_molecules=2000):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        use_target_uncertainty = (absolute_tolerance != UNDEFINED
                                  or relative_tolerance != UNDEFINED)

        # Setup the fully solvated systems.
        build_full_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_solvated_coordinates")
        build_full_coordinates.substance = ProtocolPath("substance", "global")
        build_full_coordinates.max_molecules = n_molecules

        assign_full_parameters = forcefield.BaseBuildSystem(
            f"assign_solvated_parameters")
        assign_full_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_full_parameters.substance = ProtocolPath("substance", "global")
        assign_full_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        # Perform a quick minimisation of the full system to give
        # YANK a better starting point for its minimisation.
        energy_minimisation = openmm.OpenMMEnergyMinimisation(
            "energy_minimisation")
        energy_minimisation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        energy_minimisation.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_full_coordinates.id)

        equilibration_simulation = openmm.OpenMMSimulation(
            "equilibration_simulation")
        equilibration_simulation.ensemble = Ensemble.NPT
        equilibration_simulation.steps_per_iteration = 100000
        equilibration_simulation.output_frequency = 10000
        equilibration_simulation.timestep = 2.0 * unit.femtosecond
        equilibration_simulation.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global")
        equilibration_simulation.system_path = ProtocolPath(
            "system_path", assign_full_parameters.id)
        equilibration_simulation.input_coordinate_file = ProtocolPath(
            "output_coordinate_file", energy_minimisation.id)

        # Create a substance which only contains the solute (e.g. for the
        # vacuum phase simulations).
        filter_solvent = miscellaneous.FilterSubstanceByRole("filter_solvent")
        filter_solvent.input_substance = ProtocolPath("substance", "global")
        filter_solvent.component_role = Component.Role.Solvent

        filter_solute = miscellaneous.FilterSubstanceByRole("filter_solute")
        filter_solute.input_substance = ProtocolPath("substance", "global")
        filter_solute.component_role = Component.Role.Solute

        # Setup the solute in vacuum system.
        build_vacuum_coordinates = coordinates.BuildCoordinatesPackmol(
            "build_vacuum_coordinates")
        build_vacuum_coordinates.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        build_vacuum_coordinates.max_molecules = 1

        assign_vacuum_parameters = forcefield.BaseBuildSystem(
            f"assign_parameters")
        assign_vacuum_parameters.force_field_path = ProtocolPath(
            "force_field_path", "global")
        assign_vacuum_parameters.substance = ProtocolPath(
            "filtered_substance", filter_solute.id)
        assign_vacuum_parameters.coordinate_file_path = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)

        # Set up the protocol to run yank.
        run_yank = yank.SolvationYankProtocol("run_solvation_yank")
        run_yank.solute = ProtocolPath("filtered_substance", filter_solute.id)
        run_yank.solvent_1 = ProtocolPath("filtered_substance",
                                          filter_solvent.id)
        run_yank.solvent_2 = Substance()
        run_yank.thermodynamic_state = ProtocolPath("thermodynamic_state",
                                                    "global")
        run_yank.steps_per_iteration = 500
        run_yank.checkpoint_interval = 50
        run_yank.solvent_1_coordinates = ProtocolPath(
            "output_coordinate_file", equilibration_simulation.id)
        run_yank.solvent_1_system = ProtocolPath("system_path",
                                                 assign_full_parameters.id)
        run_yank.solvent_2_coordinates = ProtocolPath(
            "coordinate_file_path", build_vacuum_coordinates.id)
        run_yank.solvent_2_system = ProtocolPath("system_path",
                                                 assign_vacuum_parameters.id)

        # Set up the group which will run yank until the free energy has been determined to within
        # a given uncertainty
        conditional_group = groups.ConditionalGroup(f"conditional_group")
        conditional_group.max_iterations = 20

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.right_hand_value = ProtocolPath("target_uncertainty",
                                                      "global")
            condition.left_hand_value = ProtocolPath(
                "estimated_free_energy.error", conditional_group.id,
                run_yank.id)

            conditional_group.add_condition(condition)

        # Define the total number of iterations that yank should run for.
        total_iterations = miscellaneous.MultiplyValue("total_iterations")
        total_iterations.value = 2000
        total_iterations.multiplier = ProtocolPath("current_iteration",
                                                   conditional_group.id)

        # Make sure the simulations gets extended after each iteration.
        run_yank.number_of_iterations = ProtocolPath("result",
                                                     total_iterations.id)

        conditional_group.add_protocols(total_iterations, run_yank)

        # Define the full workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            build_full_coordinates.schema,
            assign_full_parameters.schema,
            energy_minimisation.schema,
            equilibration_simulation.schema,
            filter_solvent.schema,
            filter_solute.schema,
            build_vacuum_coordinates.schema,
            assign_vacuum_parameters.schema,
            conditional_group.schema,
        ]

        schema.final_value_source = ProtocolPath("estimated_free_energy",
                                                 conditional_group.id,
                                                 run_yank.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #26
0
def generate_gradient_protocol_group(
    template_reweighting_protocol,
    force_field_path,
    coordinate_file_path,
    trajectory_file_path,
    statistics_file_path,
    replicator_id="repl",
    substance_source=None,
    id_suffix="",
    enable_pbc=True,
    effective_sample_indices=None,
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to reweight a set of existing data to estimate a particular
    property. The reweighted observable of interest will be calculated by
    following the passed in `analysis_protocol`.

    Parameters
    ----------
    template_reweighting_protocol: BaseMBARProtocol
        A template protocol which will be used to reweight the observable of
        interest to small perturbations to the parameter of interest. These
        will then be used to calculate the finite difference gradient.

        The template *must* have it's `reference_reduced_potentials` input set.
        The `target_reduced_potentials` input will be set automatically by this
        function.

        In the case that the template is of type `ReweightStatistics` and the
        observable is an energy, the statistics path will automatically be pointed
        to the energies evaluated using the perturbed parameter as opposed to the
        energy measured during the reference simulation.
    force_field_path: ProtocolPath
        The path to the force field parameters which the observables are being
         estimated at.
    coordinate_file_path: ProtocolPath
        A path to the initial coordinates of the simulation trajectory which
        was used to estimate the observable of interest.
    trajectory_file_path: ProtocolPath
        A path to the simulation trajectory which was used
        to estimate the observable of interest.
    statistics_file_path: ProtocolPath, optional
        A path to the statistics which were generated alongside
        the trajectory passed to the `trajectory_file_path`. These
        should have been generated using the passed `force_field_path`.
    replicator_id: str
        A unique id which will be used for the protocol replicator which will
        replicate this group for every parameter of interest.
    substance_source: PlaceholderValue, optional
        An optional protocol path to the substance whose gradient
        is being estimated. If None, the global property substance
        is used.
    id_suffix: str
        An optional string to append to the end of each of the
        protocol ids.
    enable_pbc: bool
        If true, periodic boundary conditions are employed when recalculating
        the reduced potentials.
    effective_sample_indices: ProtocolPath, optional
        A placeholder variable which in future will ensure that only samples
        with a non-zero weight are included in the gradient calculation.

    Returns
    -------
    ProtocolGroup
        The protocol group which will estimate the gradient of
        an observable with respect to one parameter.
    ProtocolReplicator
        The replicator which will copy the gradient group for
        every parameter of interest.
    ProtocolPath
        A protocol path which points to the final gradient value.
    """

    assert isinstance(template_reweighting_protocol, reweighting.BaseMBARProtocol)
    assert template_reweighting_protocol.reference_reduced_potentials is not None
    assert template_reweighting_protocol.reference_reduced_potentials != UNDEFINED

    id_suffix = f"_$({replicator_id}){id_suffix}"

    # Set values of the optional parameters.
    substance_source = (
        ProtocolPath("substance", "global")
        if substance_source is None
        else substance_source
    )
    effective_sample_indices = (
        effective_sample_indices if effective_sample_indices is not None else []
    )

    # Define the protocol which will evaluate the reduced potentials of the
    # reference, forward and reverse states using only a subset of the full
    # force field.
    reduced_potentials = openmm.OpenMMGradientPotentials(
        f"gradient_reduced_potentials{id_suffix}"
    )
    reduced_potentials.substance = substance_source
    reduced_potentials.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    reduced_potentials.force_field_path = force_field_path
    reduced_potentials.statistics_path = statistics_file_path
    reduced_potentials.trajectory_file_path = trajectory_file_path
    reduced_potentials.coordinate_file_path = coordinate_file_path
    reduced_potentials.parameter_key = ReplicatorValue(replicator_id)
    reduced_potentials.enable_pbc = enable_pbc
    reduced_potentials.effective_sample_indices = effective_sample_indices

    # Set up the protocols which will actually reweight the value of the
    # observable to the forward and reverse states.
    template_reweighting_protocol.bootstrap_iterations = 1
    template_reweighting_protocol.required_effective_samples = 0

    # We need to make sure we use the observable evaluated at the target state
    # if the observable depends on the parameter being reweighted.
    use_target_state_energies = isinstance(
        template_reweighting_protocol, reweighting.ReweightStatistics
    ) and (
        template_reweighting_protocol.statistics_type == ObservableType.PotentialEnergy
        or template_reweighting_protocol.statistics_type
        == ObservableType.ReducedPotential
        or template_reweighting_protocol.statistics_type == ObservableType.TotalEnergy
        or template_reweighting_protocol.statistics_type == ObservableType.Enthalpy
    )

    template_reweighting_schema = template_reweighting_protocol.schema

    # Create the reweighting protocols from the template schema.
    reverse_mbar_schema = copy.deepcopy(template_reweighting_schema)
    reverse_mbar_schema.id = f"reverse_reweight{id_suffix}"
    reverse_mbar = registered_workflow_protocols[reverse_mbar_schema.type](
        reverse_mbar_schema.id
    )
    reverse_mbar.schema = reverse_mbar_schema
    reverse_mbar.target_reduced_potentials = ProtocolPath(
        "reverse_potentials_path", reduced_potentials.id
    )

    forward_mbar_schema = copy.deepcopy(template_reweighting_schema)
    forward_mbar_schema.id = f"forward_reweight{id_suffix}"
    forward_mbar = registered_workflow_protocols[forward_mbar_schema.type](
        forward_mbar_schema.id
    )
    forward_mbar.schema = forward_mbar_schema
    forward_mbar.target_reduced_potentials = ProtocolPath(
        "forward_potentials_path", reduced_potentials.id
    )

    if use_target_state_energies:

        reverse_mbar.statistics_paths = [
            ProtocolPath("reverse_potentials_path", reduced_potentials.id)
        ]
        forward_mbar.statistics_paths = [
            ProtocolPath("forward_potentials_path", reduced_potentials.id)
        ]

    # Set up the protocol which will actually evaluate the parameter gradient
    # using the central difference method.
    central_difference = gradients.CentralDifferenceGradient(
        f"central_difference{id_suffix}"
    )
    central_difference.parameter_key = ReplicatorValue(replicator_id)
    central_difference.reverse_observable_value = ProtocolPath("value", reverse_mbar.id)
    central_difference.forward_observable_value = ProtocolPath("value", forward_mbar.id)
    central_difference.reverse_parameter_value = ProtocolPath(
        "reverse_parameter_value", reduced_potentials.id
    )
    central_difference.forward_parameter_value = ProtocolPath(
        "forward_parameter_value", reduced_potentials.id
    )

    # Assemble all of the protocols into a convenient group.
    gradient_group = groups.ProtocolGroup(f"gradient_group{id_suffix}")
    gradient_group.add_protocols(
        reduced_potentials, reverse_mbar, forward_mbar, central_difference
    )

    # Create the replicator which will copy the group for each parameter gradient
    # which will be calculated.
    parameter_replicator = ProtocolReplicator(replicator_id=replicator_id)
    parameter_replicator.template_values = ProtocolPath(
        "parameter_gradient_keys", "global"
    )

    return (
        gradient_group,
        parameter_replicator,
        ProtocolPath("gradient", gradient_group.id, central_difference.id),
    )
Example #27
0
def generate_base_simulation_protocols(
    analysis_protocol,
    use_target_uncertainty,
    id_suffix="",
    conditional_group=None,
    n_molecules=1000,
):
    """Constructs a set of protocols which, when combined in a workflow schema,
    may be executed to run a single simulation to estimate a particular
    property. The observable of interest to extract from the simulation is determined
    by the passed in `analysis_protocol`.

    The protocols returned will:

        1) Build a set of liquid coordinates for the
           property substance using packmol.

        2) Assign a set of smirnoff force field parameters
           to the system.

        3) Perform an energy minimisation on the system.

        4) Run a short NPT equilibration simulation for 100000 steps
           using a timestep of 2fs.

        5) Within a conditional group (up to a maximum of 100 times):

            5a) Run a longer NPT production simulation for 1000000 steps using a timestep of 2fs

            5b) Extract the average value of an observable and it's uncertainty.

            5c) If a convergence mode is set by the options, check if the target uncertainty has been met.
                If not, repeat steps 5a), 5b) and 5c).

        6) Extract uncorrelated configurations from a generated production
           simulation.

        7) Extract uncorrelated statistics from a generated production
           simulation.

    Parameters
    ----------
    analysis_protocol: AveragePropertyProtocol
        The protocol which will extract the observable of
        interest from the generated simulation data.
    use_target_uncertainty: bool
        Whether to run the simulation until the observable is
        estimated to within the target uncertainty.
    id_suffix: str
        A string suffix to append to each of the protocol ids.
    conditional_group: ProtocolGroup, optional
        A custom group to wrap the main simulation / extraction
        protocols within. It is up to the caller of this method to
        manually add the convergence conditions to this group.
        If `None`, a default group with uncertainty convergence
        conditions is automatically constructed.
    n_molecules: int
        The number of molecules to use in the workflow.

    Returns
    -------
    BaseSimulationProtocols
        A named tuple of the generated protocols.
    ProtocolPath
        A reference to the final value of the estimated observable
        and its uncertainty (a `pint.Measurement`).
    StoredSimulationData
        An object which describes the default data from a simulation to store,
        such as the uncorrelated statistics and configurations.
    """

    assert isinstance(analysis_protocol, analysis.AveragePropertyProtocol)

    build_coordinates = coordinates.BuildCoordinatesPackmol(
        f"build_coordinates{id_suffix}"
    )
    build_coordinates.substance = ProtocolPath("substance", "global")
    build_coordinates.max_molecules = n_molecules

    assign_parameters = forcefield.BaseBuildSystem(f"assign_parameters{id_suffix}")
    assign_parameters.force_field_path = ProtocolPath("force_field_path", "global")
    assign_parameters.coordinate_file_path = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    assign_parameters.substance = ProtocolPath("output_substance", build_coordinates.id)

    # Equilibration
    energy_minimisation = openmm.OpenMMEnergyMinimisation(
        f"energy_minimisation{id_suffix}"
    )
    energy_minimisation.input_coordinate_file = ProtocolPath(
        "coordinate_file_path", build_coordinates.id
    )
    energy_minimisation.system_path = ProtocolPath("system_path", assign_parameters.id)

    equilibration_simulation = openmm.OpenMMSimulation(
        f"equilibration_simulation{id_suffix}"
    )
    equilibration_simulation.ensemble = Ensemble.NPT
    equilibration_simulation.steps_per_iteration = 100000
    equilibration_simulation.output_frequency = 5000
    equilibration_simulation.timestep = 2.0 * unit.femtosecond
    equilibration_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    equilibration_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", energy_minimisation.id
    )
    equilibration_simulation.system_path = ProtocolPath(
        "system_path", assign_parameters.id
    )

    # Production
    production_simulation = openmm.OpenMMSimulation(f"production_simulation{id_suffix}")
    production_simulation.ensemble = Ensemble.NPT
    production_simulation.steps_per_iteration = 1000000
    production_simulation.output_frequency = 2000
    production_simulation.timestep = 2.0 * unit.femtosecond
    production_simulation.thermodynamic_state = ProtocolPath(
        "thermodynamic_state", "global"
    )
    production_simulation.input_coordinate_file = ProtocolPath(
        "output_coordinate_file", equilibration_simulation.id
    )
    production_simulation.system_path = ProtocolPath(
        "system_path", assign_parameters.id
    )

    # Set up a conditional group to ensure convergence of uncertainty
    if conditional_group is None:

        conditional_group = groups.ConditionalGroup(f"conditional_group{id_suffix}")
        conditional_group.max_iterations = 100

        if use_target_uncertainty:

            condition = groups.ConditionalGroup.Condition()
            condition.right_hand_value = ProtocolPath("target_uncertainty", "global")
            condition.type = groups.ConditionalGroup.Condition.Type.LessThan
            condition.left_hand_value = ProtocolPath(
                "value.error", conditional_group.id, analysis_protocol.id
            )

            conditional_group.add_condition(condition)

            # Make sure the simulation gets extended after each iteration.
            production_simulation.total_number_of_iterations = ProtocolPath(
                "current_iteration", conditional_group.id
            )

    conditional_group.add_protocols(production_simulation, analysis_protocol)

    # Point the analyse protocol to the correct data source
    if isinstance(analysis_protocol, analysis.AverageTrajectoryProperty):
        analysis_protocol.input_coordinate_file = ProtocolPath(
            "coordinate_file_path", build_coordinates.id
        )
        analysis_protocol.trajectory_path = ProtocolPath(
            "trajectory_file_path", production_simulation.id
        )

    elif isinstance(analysis_protocol, analysis.ExtractAverageStatistic):
        analysis_protocol.statistics_path = ProtocolPath(
            "statistics_file_path", production_simulation.id
        )

    else:
        raise ValueError(
            "The analysis protocol must inherit from either the "
            "AverageTrajectoryProperty or ExtractAverageStatistic "
            "protocols."
        )

    # Finally, extract uncorrelated data
    statistical_inefficiency = ProtocolPath(
        "statistical_inefficiency", conditional_group.id, analysis_protocol.id
    )
    equilibration_index = ProtocolPath(
        "equilibration_index", conditional_group.id, analysis_protocol.id
    )
    coordinate_file = ProtocolPath(
        "output_coordinate_file", conditional_group.id, production_simulation.id
    )
    trajectory_path = ProtocolPath(
        "trajectory_file_path", conditional_group.id, production_simulation.id
    )
    statistics_path = ProtocolPath(
        "statistics_file_path", conditional_group.id, production_simulation.id
    )

    extract_uncorrelated_trajectory = analysis.ExtractUncorrelatedTrajectoryData(
        f"extract_traj{id_suffix}"
    )
    extract_uncorrelated_trajectory.statistical_inefficiency = statistical_inefficiency
    extract_uncorrelated_trajectory.equilibration_index = equilibration_index
    extract_uncorrelated_trajectory.input_coordinate_file = coordinate_file
    extract_uncorrelated_trajectory.input_trajectory_path = trajectory_path

    extract_uncorrelated_statistics = analysis.ExtractUncorrelatedStatisticsData(
        f"extract_stats{id_suffix}"
    )
    extract_uncorrelated_statistics.statistical_inefficiency = statistical_inefficiency
    extract_uncorrelated_statistics.equilibration_index = equilibration_index
    extract_uncorrelated_statistics.input_statistics_path = statistics_path

    # Build the object which defines which pieces of simulation data to store.
    output_to_store = StoredSimulationData()

    output_to_store.thermodynamic_state = ProtocolPath("thermodynamic_state", "global")
    output_to_store.property_phase = PropertyPhase.Liquid

    output_to_store.force_field_id = PlaceholderValue()

    output_to_store.number_of_molecules = ProtocolPath(
        "output_number_of_molecules", build_coordinates.id
    )
    output_to_store.substance = ProtocolPath("output_substance", build_coordinates.id)
    output_to_store.statistical_inefficiency = statistical_inefficiency
    output_to_store.statistics_file_name = ProtocolPath(
        "output_statistics_path", extract_uncorrelated_statistics.id
    )
    output_to_store.trajectory_file_name = ProtocolPath(
        "output_trajectory_path", extract_uncorrelated_trajectory.id
    )
    output_to_store.coordinate_file_name = coordinate_file

    output_to_store.source_calculation_id = PlaceholderValue()

    # Define where the final values come from.
    final_value_source = ProtocolPath(
        "value", conditional_group.id, analysis_protocol.id
    )

    base_protocols = BaseSimulationProtocols(
        build_coordinates,
        assign_parameters,
        energy_minimisation,
        equilibration_simulation,
        production_simulation,
        analysis_protocol,
        conditional_group,
        extract_uncorrelated_trajectory,
        extract_uncorrelated_statistics,
    )

    return base_protocols, final_value_source, output_to_store
Example #28
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        data_replicator_id = "data_replicator"

        # Set up a protocol to extract the dielectric constant from the stored data.
        extract_dielectric = ExtractAverageDielectric(
            f"calc_dielectric_$({data_replicator_id})"
        )

        # For the dielectric constant, we employ a slightly more advanced reweighting
        # protocol set up for calculating fluctuation properties.
        reweight_dielectric = ReweightDielectricConstant("reweight_dielectric")
        reweight_dielectric.reference_dipole_moments = ProtocolPath(
            "uncorrelated_values", extract_dielectric.id
        )
        reweight_dielectric.reference_volumes = ProtocolPath(
            "uncorrelated_volumes", extract_dielectric.id
        )
        reweight_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global"
        )
        reweight_dielectric.bootstrap_uncertainties = True
        reweight_dielectric.bootstrap_iterations = 200
        reweight_dielectric.required_effective_samples = n_effective_samples

        protocols, data_replicator = generate_base_reweighting_protocols(
            extract_dielectric, reweight_dielectric, data_replicator_id
        )

        # Make sure input is taken from the correct protocol outputs.
        extract_dielectric.system_path = ProtocolPath(
            "system_path", protocols.build_reference_system.id
        )
        extract_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", protocols.unpack_stored_data.id
        )

        # Set up the gradient calculations
        coordinate_path = ProtocolPath(
            "output_coordinate_path", protocols.concatenate_trajectories.id
        )
        trajectory_path = ProtocolPath(
            "output_trajectory_path", protocols.concatenate_trajectories.id
        )
        statistics_path = ProtocolPath(
            "statistics_file_path", protocols.reduced_target_potential.id
        )

        reweight_dielectric_template = copy.deepcopy(reweight_dielectric)

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            reweight_dielectric_template,
            ProtocolPath("force_field_path", "global"),
            coordinate_path,
            trajectory_path,
            statistics_path,
            replicator_id="grad",
            effective_sample_indices=ProtocolPath(
                "effective_sample_indices", reweight_dielectric.id
            ),
        )

        schema = WorkflowSchema()
        schema.protocol_schemas = [
            *(x.schema for x in protocols),
            gradient_group.schema,
        ]
        schema.protocol_replicators = [data_replicator, gradient_replicator]
        schema.gradients_sources = [gradient_source]
        schema.final_value_source = ProtocolPath("value", protocols.mbar_protocol.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #29
0
    def default_simulation_schema(
        absolute_tolerance=UNDEFINED, relative_tolerance=UNDEFINED, n_molecules=1000
    ):
        """Returns the default calculation schema to use when estimating
        this class of property from direct simulations.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_molecules: int
            The number of molecules to use in the simulation.

        Returns
        -------
        SimulationSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = SimulationSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Define the protocol which will extract the average dielectric constant
        # from the results of a simulation.
        extract_dielectric = ExtractAverageDielectric("extract_dielectric")
        extract_dielectric.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global"
        )

        # Define the protocols which will run the simulation itself.
        use_target_uncertainty = (
            absolute_tolerance != UNDEFINED or relative_tolerance != UNDEFINED
        )

        protocols, value_source, output_to_store = generate_base_simulation_protocols(
            extract_dielectric, use_target_uncertainty, n_molecules=n_molecules,
        )

        # Make sure the input of the analysis protcol is properly hooked up.
        extract_dielectric.system_path = ProtocolPath(
            "system_path", protocols.assign_parameters.id
        )

        # Dielectric constants typically take longer to converge, so we need to
        # reflect this in the maximum number of convergence iterations.
        protocols.converge_uncertainty.max_iterations = 400

        # Set up the gradient calculations. For dielectric constants, we need to use
        # a slightly specialised reweighting protocol which we set up here.
        coordinate_source = ProtocolPath(
            "output_coordinate_file", protocols.equilibration_simulation.id
        )
        trajectory_source = ProtocolPath(
            "trajectory_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )
        statistics_source = ProtocolPath(
            "statistics_file_path",
            protocols.converge_uncertainty.id,
            protocols.production_simulation.id,
        )

        gradient_mbar_protocol = ReweightDielectricConstant("gradient_mbar")
        gradient_mbar_protocol.reference_dipole_moments = [
            ProtocolPath(
                "dipole_moments",
                protocols.converge_uncertainty.id,
                extract_dielectric.id,
            )
        ]
        gradient_mbar_protocol.reference_volumes = [
            ProtocolPath(
                "volumes", protocols.converge_uncertainty.id, extract_dielectric.id
            )
        ]
        gradient_mbar_protocol.thermodynamic_state = ProtocolPath(
            "thermodynamic_state", "global"
        )
        gradient_mbar_protocol.reference_reduced_potentials = statistics_source

        (
            gradient_group,
            gradient_replicator,
            gradient_source,
        ) = generate_gradient_protocol_group(
            gradient_mbar_protocol,
            ProtocolPath("force_field_path", "global"),
            coordinate_source,
            trajectory_source,
            statistics_source,
        )

        # Build the workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            protocols.build_coordinates.schema,
            protocols.assign_parameters.schema,
            protocols.energy_minimisation.schema,
            protocols.equilibration_simulation.schema,
            protocols.converge_uncertainty.schema,
            protocols.extract_uncorrelated_trajectory.schema,
            protocols.extract_uncorrelated_statistics.schema,
            gradient_group.schema,
        ]

        schema.protocol_replicators = [gradient_replicator]

        schema.outputs_to_store = {"full_system": output_to_store}

        schema.gradients_sources = [gradient_source]
        schema.final_value_source = value_source

        calculation_schema.workflow_schema = schema
        return calculation_schema
Example #30
0
    def default_reweighting_schema(
        absolute_tolerance=UNDEFINED,
        relative_tolerance=UNDEFINED,
        n_effective_samples=50,
    ):
        """Returns the default calculation schema to use when estimating
        this property by reweighting existing data.

        Parameters
        ----------
        absolute_tolerance: pint.Quantity, optional
            The absolute tolerance to estimate the property to within.
        relative_tolerance: float
            The tolerance (as a fraction of the properties reported
            uncertainty) to estimate the property to within.
        n_effective_samples: int
            The minimum number of effective samples to require when
            reweighting the cached simulation data.

        Returns
        -------
        ReweightingSchema
            The schema to follow when estimating this property.
        """
        assert absolute_tolerance == UNDEFINED or relative_tolerance == UNDEFINED

        calculation_schema = ReweightingSchema()
        calculation_schema.absolute_tolerance = absolute_tolerance
        calculation_schema.relative_tolerance = relative_tolerance

        # Set up the storage queries
        calculation_schema.storage_queries = (
            ExcessMolarVolume._default_reweighting_storage_query()
        )

        # Set up a replicator that will re-run the component reweighting workflow for each
        # component in the system.
        component_replicator = ProtocolReplicator(replicator_id="component_replicator")
        component_replicator.template_values = ProtocolPath("components", "global")

        gradient_replicator = ProtocolReplicator("gradient")
        gradient_replicator.template_values = ProtocolPath(
            "parameter_gradient_keys", "global"
        )

        # Set up the protocols which will reweight data for the full system.
        full_data_replicator_id = "full_data_replicator"

        (
            full_protocols,
            full_volume,
            full_data_replicator,
            full_gradient_group,
            full_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_full",
            gradient_replicator.id,
            full_data_replicator_id,
            n_effective_samples=n_effective_samples,
        )

        # Set up the protocols which will reweight data for each component.
        component_data_replicator_id = (
            f"component_{component_replicator.placeholder_id}_data_replicator"
        )

        (
            component_protocols,
            component_volumes,
            component_data_replicator,
            component_gradient_group,
            component_gradient_source,
        ) = ExcessMolarVolume._get_reweighting_protocols(
            "_component",
            gradient_replicator.id,
            component_data_replicator_id,
            replicator_id=component_replicator.id,
            weight_by_mole_fraction=True,
            substance_reference=ReplicatorValue(component_replicator.id),
            n_effective_samples=n_effective_samples,
        )

        # Make sure the replicator is only replicating over component data.
        component_data_replicator.template_values = ProtocolPath(
            f"component_data[$({component_replicator.id})]", "global"
        )

        add_component_molar_volumes = miscellaneous.AddValues(
            "add_component_molar_volumes"
        )
        add_component_molar_volumes.values = component_volumes

        calculate_excess_volume = miscellaneous.SubtractValues(
            "calculate_excess_potential"
        )
        calculate_excess_volume.value_b = full_volume
        calculate_excess_volume.value_a = ProtocolPath(
            "result", add_component_molar_volumes.id
        )

        # Combine the gradients.
        add_component_gradients = miscellaneous.AddValues(
            f"add_component_gradients" f"_{gradient_replicator.placeholder_id}"
        )
        add_component_gradients.values = component_gradient_source

        combine_gradients = miscellaneous.SubtractValues(
            f"combine_gradients_{gradient_replicator.placeholder_id}"
        )
        combine_gradients.value_b = full_gradient_source
        combine_gradients.value_a = ProtocolPath("result", add_component_gradients.id)

        # Build the final workflow schema.
        schema = WorkflowSchema()

        schema.protocol_schemas = [
            *(x.schema for x in full_protocols),
            *(x.schema for x in component_protocols),
            add_component_molar_volumes.schema,
            calculate_excess_volume.schema,
            full_gradient_group.schema,
            component_gradient_group.schema,
            add_component_gradients.schema,
            combine_gradients.schema,
        ]

        schema.protocol_replicators = [
            full_data_replicator,
            component_replicator,
            component_data_replicator,
            gradient_replicator,
        ]

        schema.gradients_sources = [ProtocolPath("result", combine_gradients.id)]
        schema.final_value_source = ProtocolPath("result", calculate_excess_volume.id)

        calculation_schema.workflow_schema = schema
        return calculation_schema