예제 #1
0
    def _get_unreplicated_path(self, protocol_path):
        """Checks to see if the protocol pointed to by this path will only
        exist after a replicator has been applied, and if so, returns a
        path to the unreplicated protocol.

        Parameters
        ----------
        protocol_path: ProtocolPath
            The path to convert to an unreplicated path.

        Returns
        -------
        ProtocolPath
            The path which should point to only unreplicated protocols
        """

        if self.protocol_replicators == UNDEFINED:
            return protocol_path.copy()

        full_unreplicated_path = str(protocol_path.full_path)

        for replicator in self.protocol_replicators:

            if replicator.placeholder_id in full_unreplicated_path:
                continue

            protocols_to_replicate = self._find_protocols_to_be_replicated(
                replicator)

            for protocol_id in protocols_to_replicate:

                match_pattern = re.escape(
                    protocol_id.replace(replicator.placeholder_id, r"\d+"))
                match_pattern = match_pattern.replace(re.escape(r"\d+"),
                                                      r"\d+")

                full_unreplicated_path = re.sub(match_pattern, protocol_id,
                                                full_unreplicated_path)

        return ProtocolPath.from_string(full_unreplicated_path)
예제 #2
0
    def update_references(self, protocols, replication_map, template_values):
        """Redirects the input references of protocols to the replicated
        versions.

        Parameters
        ----------
        protocols: dict of str and Protocol
            The protocols which have had this replicator applied
            to them.
        replication_map: dict of ProtocolPath and list of tuple of ProtocolPath and int
            A dictionary of references to all of the protocols which have
            been replicated, with keys of original protocol ids. Each value
            is comprised of a list of the replicated protocol ids, and their
            index into the `template_values` array.
        template_values: List of Any
            A list of the values which will be inserted
            into the newly replicated protocols.
        """

        inverse_replication_map = {}

        for original_id, replicated_ids in replication_map.items():
            for replicated_id, index in replicated_ids:
                inverse_replication_map[replicated_id] = (original_id, index)

        for protocol_id, protocol in protocols.items():

            # Look at each of the protocols inputs and see if its value is either a ProtocolPath,
            # or a list of ProtocolPath's.
            for required_input in protocol.required_inputs:

                all_value_references = protocol.get_value_references(
                    required_input)
                replicated_value_references = {}

                for source_path, value_reference in all_value_references.items(
                ):

                    if self.placeholder_id not in value_reference.full_path:
                        continue

                    replicated_value_references[source_path] = value_reference

                # If this protocol does not take input from one of the replicated protocols,
                # then we are done.
                if len(replicated_value_references) == 0:
                    continue

                for source_path, value_reference in replicated_value_references.items(
                ):

                    full_source_path = source_path.copy()
                    full_source_path.prepend_protocol_id(protocol_id)

                    # If the protocol was not itself replicated by this replicator, its value
                    # is set to a list containing references to all newly replicated protocols.
                    # Otherwise, the value will be set to a reference to just the protocol which
                    # was replicated using the same index.
                    value_source = [
                        ProtocolPath.from_string(
                            value_reference.full_path.replace(
                                self.placeholder_id, str(index)))
                        for index in range(len(template_values))
                    ]

                    for replicated_id, map_tuple in inverse_replication_map.items(
                    ):

                        original_id, replicated_index = map_tuple

                        if (full_source_path.protocol_path !=
                                replicated_id.protocol_path):
                            continue

                        value_source = ProtocolPath.from_string(
                            value_reference.full_path.replace(
                                self.placeholder_id, str(replicated_index)))

                        break

                    # Replace the input value with a list of ProtocolPath's that point to
                    # the newly generated protocols.
                    protocol.set_value(source_path, value_source)
예제 #3
0
    def _apply_replicator_to_replicators(replicator, schema, template_values):
        """Applies a replicator to any replicators which depend upon
        it (e.g. replicators with ids similar to `other_id_$(replicator.id)`).

        Parameters
        ----------
        replicator: ProtocolReplicator
            The replicator being applied.
        schema: WorkflowSchema
            The workflow schema to which the replicator belongs.
        template_values: List of Any
            The values which the replicator is applying.
        """

        # Look over all of the replicators left to apply and update them
        # to point to the newly replicated protocols where appropriate.
        new_indices = [str(index) for index in range(len(template_values))]

        replicators = []

        for original_replicator in schema.protocol_replicators:

            # Check whether this replicator will be replicated.
            if replicator.placeholder_id not in original_replicator.id:

                replicators.append(original_replicator)
                continue

            # Create the replicated replicators
            for template_index in new_indices:

                replicator_id = original_replicator.id.replace(
                    replicator.placeholder_id, template_index)

                new_replicator = ProtocolReplicator(replicator_id)
                new_replicator.template_values = original_replicator.template_values

                # Make sure to replace any reference to the applied replicator
                # with the actual index.
                if isinstance(new_replicator.template_values, ProtocolPath):

                    updated_path = new_replicator.template_values.full_path.replace(
                        replicator.placeholder_id, template_index)

                    new_replicator.template_values = ProtocolPath.from_string(
                        updated_path)

                elif isinstance(new_replicator.template_values, list):

                    updated_values = []

                    for template_value in new_replicator.template_values:

                        if not isinstance(template_value, ProtocolPath):

                            updated_values.append(template_value)
                            continue

                        updated_path = template_value.full_path.replace(
                            replicator.placeholder_id, template_index)
                        updated_values.append(
                            ProtocolPath.from_string(updated_path))

                    new_replicator.template_values = updated_values

                replicators.append(new_replicator)

        schema.protocol_replicators = replicators
예제 #4
0
    def _apply_replicator_to_outputs(replicator, schema, template_values):
        """Applies a replicator to a schema outputs to store.

        Parameters
        ----------
        replicator: ProtocolReplicator
            The replicator to apply.
        schema: WorkflowSchema
            The schema which defines the outputs to store.
        template_values: List of Any
            The values being applied by the replicator.
        """

        outputs_to_replicate = []

        if schema.outputs_to_store != UNDEFINED:

            outputs_to_replicate = [
                label for label in schema.outputs_to_store
                if label.find(replicator.id) >= 0
            ]

        # Check to see if there are any outputs to store pointing to
        # protocols which are being replicated.
        for output_label in outputs_to_replicate:

            output_to_replicate = schema.outputs_to_store.pop(output_label)

            for index, template_value in enumerate(template_values):

                replicated_label = output_label.replace(
                    replicator.placeholder_id, str(index))
                replicated_output = copy.deepcopy(output_to_replicate)

                for attribute_name in replicated_output.get_attributes(
                        StorageAttribute):

                    attribute_value = getattr(replicated_output,
                                              attribute_name)

                    if isinstance(attribute_value, ProtocolPath):

                        attribute_value = ProtocolPath.from_string(
                            attribute_value.full_path.replace(
                                replicator.placeholder_id, str(index)))

                    elif isinstance(attribute_value, ReplicatorValue):

                        if attribute_value.replicator_id != replicator.id:

                            # Make sure to handle nested dependent replicators.
                            attribute_value.replicator_id = attribute_value.replicator_id.replace(
                                replicator.placeholder_id, str(index))

                            continue

                        attribute_value = template_value

                    setattr(replicated_output, attribute_name, attribute_value)

                schema.outputs_to_store[replicated_label] = replicated_output
예제 #5
0
    def _apply_replicator(self, schema, replicator):
        """A method to create a set of protocol schemas based on a ProtocolReplicator,
        and add them to the list of existing schemas.

        Parameters
        ----------
        schema: WorkflowSchema
            The schema which contains the protocol definitions
        replicator: `ProtocolReplicator`
            The replicator which describes which new protocols should
            be created.
        """

        # Get the list of values which will be passed to the newly created protocols.
        template_values = self._get_template_values(replicator)

        # Replicate the protocols.
        protocols = {}

        for protocol_schema in schema.protocol_schemas:

            protocol = protocol_schema.to_protocol()
            protocols[protocol.id] = protocol

        replicated_protocols, replication_map = replicator.apply(
            protocols, template_values)
        replicator.update_references(replicated_protocols, replication_map,
                                     template_values)

        # Update the schema with the replicated protocols.
        schema.protocol_schemas = [
            replicated_protocols[key].schema for key in replicated_protocols
        ]

        # Make sure to correctly replicate gradient sources.
        replicated_gradient_sources = []

        if schema.gradients_sources != UNDEFINED:

            for gradient_source in schema.gradients_sources:

                if replicator.placeholder_id not in gradient_source.full_path:

                    replicated_gradient_sources.append(gradient_source)
                    continue

                for index, template_value in enumerate(template_values):

                    replicated_source = ProtocolPath.from_string(
                        gradient_source.full_path.replace(
                            replicator.placeholder_id, str(index)))

                    replicated_gradient_sources.append(replicated_source)

        schema.gradients_sources = replicated_gradient_sources

        # Replicate any outputs.
        self._apply_replicator_to_outputs(replicator, schema, template_values)
        # Replicate any replicators.
        self._apply_replicator_to_replicators(replicator, schema,
                                              template_values)
예제 #6
0
    def _gather_results(
        directory,
        workflow_id,
        value_reference,
        gradient_sources,
        outputs_to_store,
        *protocol_result_paths,
        **_,
    ):
        """Gather the data associated with the workflows in this graph.

        Parameters
        ----------
        directory: str
            The directory to store any working files in.
        workflow_id: str
            The id of the workflow associated with this result.
        value_reference: ProtocolPath, optional
            A reference to which property in the output dictionary is the actual value.
        gradient_sources: list of ProtocolPath
            A list of references to those entries in the output dictionaries which correspond
            to parameter gradients.
        outputs_to_store: dict of str and WorkflowOutputToStore
            A list of references to data which should be stored on the storage backend.
        protocol_results: dict of str and str
            The result dictionary of the protocol which calculated the value of the property.

        Returns
        -------
        CalculationLayerResult, optional
            The result of attempting to estimate this property from a workflow graph. `None`
            will be returned if the target uncertainty is set but not met.
        """

        return_object = WorkflowResult()
        return_object.workflow_id = workflow_id

        try:

            results_by_id = {}

            for protocol_id, protocol_result_path in protocol_result_paths:

                with open(protocol_result_path, "r") as file:
                    protocol_results = json.load(file, cls=TypedJSONDecoder)

                # Make sure none of the protocols failed and we actually have a value
                # and uncertainty.
                if isinstance(protocol_results, EvaluatorException):

                    return_object.exceptions.append(protocol_results)
                    return return_object

                # Store the protocol results in a dictionary, with keys of the
                # path to the original protocol output.
                for protocol_path, output_value in protocol_results.items():

                    protocol_path = ProtocolPath.from_string(protocol_path)

                    if (protocol_path.start_protocol is None
                            or protocol_path.start_protocol != protocol_id):
                        protocol_path.prepend_protocol_id(protocol_id)

                    results_by_id[protocol_path] = output_value

            if value_reference is not None:
                return_object.value = results_by_id[value_reference]

            for gradient_source in gradient_sources:
                return_object.gradients.append(results_by_id[gradient_source])

            return_object.data_to_store = []

            for output_to_store in outputs_to_store.values():

                unique_id = str(uuid.uuid4()).replace("-", "")

                data_object_path = path.join(directory,
                                             f"data_{unique_id}.json")
                data_directory = path.join(directory, f"data_{unique_id}")

                WorkflowGraph._store_output_data(
                    data_object_path,
                    data_directory,
                    output_to_store,
                    results_by_id,
                )

                return_object.data_to_store.append(
                    (data_object_path, data_directory))

        except Exception as e:
            return_object.exceptions.append(
                EvaluatorException.from_exception(e))

        return return_object