Python ProtocolGraph Examples

Programming Language: Python

Namespace/Package Name: openff.evaluator.workflow

Class/Type: ProtocolGraph

Examples at hotexamples.com: 6

Python ProtocolGraph - 6 examples found. These are the top rated real world Python examples of openff.evaluator.workflow.ProtocolGraph extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

ProtocolGraph(5)

add_protocols(5)

execute(3)

_build_dependants_graph(1)

Example #1

Show file

def test_protocol_graph_execution(calculation_backend, compute_resources):

    if calculation_backend is not None:
        calculation_backend.start()

    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_a, protocol_b)

    with tempfile.TemporaryDirectory() as directory:

        results = protocol_graph.execute(directory, calculation_backend,
                                         compute_resources)

        final_result = results[protocol_b.id]

        if calculation_backend is not None:
            final_result = final_result.result()

        with open(final_result[1]) as file:
            results_b = json.load(file, cls=TypedJSONDecoder)

    assert results_b[".output_value"] == protocol_a.input_value

    if compute_resources is not None:
        assert protocol_b.output_value == protocol_a.input_value

    if calculation_backend is not None:
        calculation_backend.stop()

Example #2

Show file

def test_protocol_group_merging():
    def build_protocols(prefix):

        #     .-------------------.
        #     |          / i - j -|- b
        # a - | g - h - |         |
        #     |          \ k - l -|- c
        #     .-------------------.
        protocol_a = DummyInputOutputProtocol(prefix + "protocol_a")
        protocol_a.input_value = 1
        fork_protocols = build_fork(prefix)
        fork_protocols[0].input_value = ProtocolPath("output_value",
                                                     protocol_a.id)
        protocol_group = ProtocolGroup(prefix + "protocol_group")
        protocol_group.add_protocols(*fork_protocols)
        protocol_b = DummyInputOutputProtocol(prefix + "protocol_b")
        protocol_b.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_j")
        protocol_c = DummyInputOutputProtocol(prefix + "protocol_c")
        protocol_c.input_value = ProtocolPath("output_value",
                                              protocol_group.id, "protocol_l")

        return [protocol_a, protocol_group, protocol_b, protocol_c]

    protocols_a = build_protocols("a_")
    protocols_b = build_protocols("b_")

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(*protocols_a)
    protocol_graph.add_protocols(*protocols_b)

    assert len(protocol_graph.protocols) == len(protocols_a)
    assert "a_protocol_group" in protocol_graph.protocols

    original_protocol_group = protocols_a[1]
    merged_protocol_group = protocol_graph.protocols["a_protocol_group"]

    assert original_protocol_group.schema.json(
    ) == merged_protocol_group.schema.json()

Example #3

Show file

def test_protocol_group_resume():
    """A test that protocol groups can recover after being killed
    (e.g. by a worker being killed due to hitting a wallclock limit)
    """

    compute_resources = ComputeResources()

    # Fake a protocol group which executes the first
    # two protocols and then 'gets killed'.
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    # Remove the output file so it appears the the protocol group had not
    # completed.
    os.unlink(
        os.path.join("graph_a", protocol_group_a.id,
                     f"{protocol_group_a.id}_output.json"))

    # Build the 'full' group with the last two protocols which
    # 'had not been exited' after the group was 'killed'
    protocol_a = DummyInputOutputProtocol("protocol_a")
    protocol_a.input_value = 1
    protocol_b = DummyInputOutputProtocol("protocol_b")
    protocol_b.input_value = ProtocolPath("output_value", protocol_a.id)
    protocol_c = DummyInputOutputProtocol("protocol_c")
    protocol_c.input_value = ProtocolPath("output_value", protocol_b.id)
    protocol_d = DummyInputOutputProtocol("protocol_d")
    protocol_d.input_value = ProtocolPath("output_value", protocol_c.id)

    protocol_group_a = ProtocolGroup("group_a")
    protocol_group_a.add_protocols(protocol_a, protocol_b, protocol_c,
                                   protocol_d)

    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(protocol_group_a)
    protocol_graph.execute("graph_a", compute_resources=compute_resources)

    assert all(x != UNDEFINED for x in protocol_group_a.outputs.values())

Example #4

Show file

def test_protocol_graph_simple(protocols_a, protocols_b):

    # Make sure that the graph can merge simple protocols
    # when they are added one after the other.
    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(*protocols_a)

    dependants_graph = protocol_graph._build_dependants_graph(
        protocol_graph.protocols, False, apply_reduction=True)

    assert len(protocol_graph.protocols) == len(protocols_a)
    assert len(dependants_graph) == len(protocols_a)
    n_root_protocols = len(protocol_graph.root_protocols)

    protocol_graph.add_protocols(*protocols_b)

    dependants_graph = protocol_graph._build_dependants_graph(
        protocol_graph.protocols, False, apply_reduction=False)

    assert len(protocol_graph.protocols) == len(protocols_a)
    assert len(dependants_graph) == len(protocols_a)
    assert len(protocol_graph.root_protocols) == n_root_protocols

    # Currently the graph shouldn't merge with an
    # addition
    protocol_graph = ProtocolGraph()
    protocol_graph.add_protocols(*protocols_a, *protocols_b)

    dependants_graph = protocol_graph._build_dependants_graph(
        protocol_graph.protocols, False, apply_reduction=False)

    assert len(protocol_graph.protocols) == len(protocols_a) + len(protocols_b)
    assert len(dependants_graph) == len(protocols_a) + len(protocols_b)
    assert len(protocol_graph.root_protocols) == 2 * n_root_protocols

Example #5

Show file

    def __init__(self):

        super(WorkflowGraph, self).__init__()

        self._workflows_to_execute = {}
        self._protocol_graph = ProtocolGraph()

Example #6

Show file

class WorkflowGraph:
    """A hierarchical structure for storing and submitting the workflows
    which will estimate a set of physical properties..
    """
    @property
    def protocols(self):
        """dict of str and Protocol: The protocols in this graph."""
        return self._protocol_graph.protocols

    @property
    def root_protocols(self):
        """list of str: The ids of the protocols in the group which do not
        take input from the other grouped protocols."""
        return self._protocol_graph.root_protocols

    def __init__(self):

        super(WorkflowGraph, self).__init__()

        self._workflows_to_execute = {}
        self._protocol_graph = ProtocolGraph()

    def add_workflows(self, *workflows):
        """Insert a set of workflows into the workflow graph.

        Parameters
        ----------
        workflow: Workflow
            The workflow to insert.
        """

        workflow_uuids = [x.uuid for x in workflows]

        if len(set(workflow_uuids)) != len(workflow_uuids):
            raise ValueError("A number of workflows have the same uuid.")

        existing_uuids = [
            x for x in workflow_uuids if x in self._workflows_to_execute
        ]

        if len(existing_uuids) > 0:

            raise ValueError(
                f"Workflows with the uuids {existing_uuids} are already in the graph."
            )

        original_protocols = []

        for workflow in workflows:

            original_protocols.extend(workflow.protocols.values())
            self._workflows_to_execute[workflow.uuid] = workflow

        # Add the workflow protocols to the graph.
        merged_protocol_ids = self._protocol_graph.add_protocols(
            *original_protocols, allow_external_dependencies=False)

        # Update the workflow to use the possibly merged protocols
        for original_id, new_id in merged_protocol_ids.items():

            original_protocol = original_id
            new_protocol = new_id

            for workflow in workflows:

                if (retrieve_uuid(original_protocol if isinstance(
                        original_protocol, str) else original_protocol.id) !=
                        workflow.uuid):
                    continue

                if original_protocol in workflow.protocols:
                    # Only retrieve the actual protocol if it isn't nested in
                    # a group.
                    original_protocol = workflow.protocols[original_id]
                    new_protocol = self._protocol_graph.protocols[new_id]

                workflow.replace_protocol(original_protocol, new_protocol,
                                          True)

    def execute(self,
                root_directory="",
                calculation_backend=None,
                compute_resources=None):
        """Executes the workflow graph.

        Parameters
        ----------
        root_directory: str
            The directory to execute the graph in.
        calculation_backend: CalculationBackend, optional.
            The backend to execute the graph on. This parameter
            is mutually exclusive with `compute_resources`.
        compute_resources: CalculationBackend, optional.
            The compute resources to run using. If None and no
            `calculation_backend` is specified, the workflow will
            be executed on a single CPU thread. This parameter
            is mutually exclusive with `calculation_backend`.

        Returns
        -------
        list of WorkflowResult or list of Future of WorkflowResult:
            The results of executing the graph. If a `calculation_backend`
            is specified, these results will be wrapped in a `Future`.
        """
        if calculation_backend is None and compute_resources is None:
            compute_resources = ComputeResources(number_of_threads=1)

        protocol_outputs = self._protocol_graph.execute(
            root_directory, calculation_backend, compute_resources)

        value_futures = []

        for workflow_id in self._workflows_to_execute:

            workflow = self._workflows_to_execute[workflow_id]
            data_futures = []

            # Make sure we keep track of all of the futures which we
            # will use to populate things such as a final property value
            # or gradient keys.
            if workflow.final_value_source != UNDEFINED:

                protocol_id = workflow.final_value_source.start_protocol
                data_futures.append(protocol_outputs[protocol_id])

            if workflow.gradients_sources != UNDEFINED:

                for gradient_source in workflow.gradients_sources:

                    protocol_id = gradient_source.start_protocol
                    data_futures.append(protocol_outputs[protocol_id])

            if workflow.outputs_to_store != UNDEFINED:

                for output_label, output_to_store in workflow.outputs_to_store.items(
                ):

                    for attribute_name in output_to_store.get_attributes(
                            StorageAttribute):

                        attribute_value = getattr(output_to_store,
                                                  attribute_name)

                        if not isinstance(attribute_value, ProtocolPath):
                            continue

                        data_futures.append(
                            protocol_outputs[attribute_value.start_protocol])

            if len(data_futures) == 0:
                data_futures = [*protocol_outputs.values()]

            if calculation_backend is None:

                value_futures.append(
                    WorkflowGraph._gather_results(
                        root_directory,
                        workflow.uuid,
                        workflow.final_value_source,
                        workflow.gradients_sources,
                        workflow.outputs_to_store,
                        *data_futures,
                    ))

            else:

                value_futures.append(
                    calculation_backend.submit_task(
                        WorkflowGraph._gather_results,
                        root_directory,
                        workflow.uuid,
                        workflow.final_value_source,
                        workflow.gradients_sources,
                        workflow.outputs_to_store,
                        *data_futures,
                    ))

        return value_futures

    @staticmethod
    def _gather_results(
        directory,
        workflow_id,
        value_reference,
        gradient_sources,
        outputs_to_store,
        *protocol_result_paths,
        **_,
    ):
        """Gather the data associated with the workflows in this graph.

        Parameters
        ----------
        directory: str
            The directory to store any working files in.
        workflow_id: str
            The id of the workflow associated with this result.
        value_reference: ProtocolPath, optional
            A reference to which property in the output dictionary is the actual value.
        gradient_sources: list of ProtocolPath
            A list of references to those entries in the output dictionaries which correspond
            to parameter gradients.
        outputs_to_store: dict of str and WorkflowOutputToStore
            A list of references to data which should be stored on the storage backend.
        protocol_results: dict of str and str
            The result dictionary of the protocol which calculated the value of the property.

        Returns
        -------
        CalculationLayerResult, optional
            The result of attempting to estimate this property from a workflow graph. `None`
            will be returned if the target uncertainty is set but not met.
        """

        return_object = WorkflowResult()
        return_object.workflow_id = workflow_id

        try:

            results_by_id = {}

            for protocol_id, protocol_result_path in protocol_result_paths:

                with open(protocol_result_path, "r") as file:
                    protocol_results = json.load(file, cls=TypedJSONDecoder)

                # Make sure none of the protocols failed and we actually have a value
                # and uncertainty.
                if isinstance(protocol_results, EvaluatorException):

                    return_object.exceptions.append(protocol_results)
                    return return_object

                # Store the protocol results in a dictionary, with keys of the
                # path to the original protocol output.
                for protocol_path, output_value in protocol_results.items():

                    protocol_path = ProtocolPath.from_string(protocol_path)

                    if (protocol_path.start_protocol is None
                            or protocol_path.start_protocol != protocol_id):
                        protocol_path.prepend_protocol_id(protocol_id)

                    results_by_id[protocol_path] = output_value

            if value_reference is not None:
                return_object.value = results_by_id[value_reference]

            for gradient_source in gradient_sources:
                return_object.gradients.append(results_by_id[gradient_source])

            return_object.data_to_store = []

            for output_to_store in outputs_to_store.values():

                unique_id = str(uuid.uuid4()).replace("-", "")

                data_object_path = path.join(directory,
                                             f"data_{unique_id}.json")
                data_directory = path.join(directory, f"data_{unique_id}")

                WorkflowGraph._store_output_data(
                    data_object_path,
                    data_directory,
                    output_to_store,
                    results_by_id,
                )

                return_object.data_to_store.append(
                    (data_object_path, data_directory))

        except Exception as e:
            return_object.exceptions.append(
                EvaluatorException.from_exception(e))

        return return_object

    @staticmethod
    def _store_output_data(
        data_object_path,
        data_directory,
        output_to_store,
        results_by_id,
    ):
        """Collects all of the simulation to store, and saves it into a directory
        whose path will be passed to the storage backend to process.

        Parameters
        ----------
        data_object_path: str
            The file path to serialize the data object to.
        data_directory: str
            The path of the directory to store ancillary data in.
        output_to_store: BaseStoredData
            An object which contains `ProtocolPath`s pointing to the
            data to store.
        results_by_id: dict of ProtocolPath and any
            The results of the protocols which formed the property
            estimation workflow.
        """

        makedirs(data_directory, exist_ok=True)

        for attribute_name in output_to_store.get_attributes(StorageAttribute):

            attribute = getattr(output_to_store.__class__, attribute_name)
            attribute_value = getattr(output_to_store, attribute_name)

            if not isinstance(attribute_value, ProtocolPath):
                continue

            attribute_value = results_by_id[attribute_value]

            if issubclass(attribute.type_hint, FilePath):
                file_copy(attribute_value, data_directory)
                attribute_value = path.basename(attribute_value)

            setattr(output_to_store, attribute_name, attribute_value)

        with open(data_object_path, "w") as file:
            json.dump(output_to_store, file, cls=TypedJSONEncoder)