def _get_unreplicated_path(self, protocol_path): """Checks to see if the protocol pointed to by this path will only exist after a replicator has been applied, and if so, returns a path to the unreplicated protocol. Parameters ---------- protocol_path: ProtocolPath The path to convert to an unreplicated path. Returns ------- ProtocolPath The path which should point to only unreplicated protocols """ if self.protocol_replicators == UNDEFINED: return protocol_path.copy() full_unreplicated_path = str(protocol_path.full_path) for replicator in self.protocol_replicators: if replicator.placeholder_id in full_unreplicated_path: continue protocols_to_replicate = self._find_protocols_to_be_replicated( replicator) for protocol_id in protocols_to_replicate: match_pattern = re.escape( protocol_id.replace(replicator.placeholder_id, r"\d+")) match_pattern = match_pattern.replace(re.escape(r"\d+"), r"\d+") full_unreplicated_path = re.sub(match_pattern, protocol_id, full_unreplicated_path) return ProtocolPath.from_string(full_unreplicated_path)
def update_references(self, protocols, replication_map, template_values): """Redirects the input references of protocols to the replicated versions. Parameters ---------- protocols: dict of str and Protocol The protocols which have had this replicator applied to them. replication_map: dict of ProtocolPath and list of tuple of ProtocolPath and int A dictionary of references to all of the protocols which have been replicated, with keys of original protocol ids. Each value is comprised of a list of the replicated protocol ids, and their index into the `template_values` array. template_values: List of Any A list of the values which will be inserted into the newly replicated protocols. """ inverse_replication_map = {} for original_id, replicated_ids in replication_map.items(): for replicated_id, index in replicated_ids: inverse_replication_map[replicated_id] = (original_id, index) for protocol_id, protocol in protocols.items(): # Look at each of the protocols inputs and see if its value is either a ProtocolPath, # or a list of ProtocolPath's. for required_input in protocol.required_inputs: all_value_references = protocol.get_value_references( required_input) replicated_value_references = {} for source_path, value_reference in all_value_references.items( ): if self.placeholder_id not in value_reference.full_path: continue replicated_value_references[source_path] = value_reference # If this protocol does not take input from one of the replicated protocols, # then we are done. if len(replicated_value_references) == 0: continue for source_path, value_reference in replicated_value_references.items( ): full_source_path = source_path.copy() full_source_path.prepend_protocol_id(protocol_id) # If the protocol was not itself replicated by this replicator, its value # is set to a list containing references to all newly replicated protocols. # Otherwise, the value will be set to a reference to just the protocol which # was replicated using the same index. value_source = [ ProtocolPath.from_string( value_reference.full_path.replace( self.placeholder_id, str(index))) for index in range(len(template_values)) ] for replicated_id, map_tuple in inverse_replication_map.items( ): original_id, replicated_index = map_tuple if (full_source_path.protocol_path != replicated_id.protocol_path): continue value_source = ProtocolPath.from_string( value_reference.full_path.replace( self.placeholder_id, str(replicated_index))) break # Replace the input value with a list of ProtocolPath's that point to # the newly generated protocols. protocol.set_value(source_path, value_source)
def _apply_replicator_to_replicators(replicator, schema, template_values): """Applies a replicator to any replicators which depend upon it (e.g. replicators with ids similar to `other_id_$(replicator.id)`). Parameters ---------- replicator: ProtocolReplicator The replicator being applied. schema: WorkflowSchema The workflow schema to which the replicator belongs. template_values: List of Any The values which the replicator is applying. """ # Look over all of the replicators left to apply and update them # to point to the newly replicated protocols where appropriate. new_indices = [str(index) for index in range(len(template_values))] replicators = [] for original_replicator in schema.protocol_replicators: # Check whether this replicator will be replicated. if replicator.placeholder_id not in original_replicator.id: replicators.append(original_replicator) continue # Create the replicated replicators for template_index in new_indices: replicator_id = original_replicator.id.replace( replicator.placeholder_id, template_index) new_replicator = ProtocolReplicator(replicator_id) new_replicator.template_values = original_replicator.template_values # Make sure to replace any reference to the applied replicator # with the actual index. if isinstance(new_replicator.template_values, ProtocolPath): updated_path = new_replicator.template_values.full_path.replace( replicator.placeholder_id, template_index) new_replicator.template_values = ProtocolPath.from_string( updated_path) elif isinstance(new_replicator.template_values, list): updated_values = [] for template_value in new_replicator.template_values: if not isinstance(template_value, ProtocolPath): updated_values.append(template_value) continue updated_path = template_value.full_path.replace( replicator.placeholder_id, template_index) updated_values.append( ProtocolPath.from_string(updated_path)) new_replicator.template_values = updated_values replicators.append(new_replicator) schema.protocol_replicators = replicators
def _apply_replicator_to_outputs(replicator, schema, template_values): """Applies a replicator to a schema outputs to store. Parameters ---------- replicator: ProtocolReplicator The replicator to apply. schema: WorkflowSchema The schema which defines the outputs to store. template_values: List of Any The values being applied by the replicator. """ outputs_to_replicate = [] if schema.outputs_to_store != UNDEFINED: outputs_to_replicate = [ label for label in schema.outputs_to_store if label.find(replicator.id) >= 0 ] # Check to see if there are any outputs to store pointing to # protocols which are being replicated. for output_label in outputs_to_replicate: output_to_replicate = schema.outputs_to_store.pop(output_label) for index, template_value in enumerate(template_values): replicated_label = output_label.replace( replicator.placeholder_id, str(index)) replicated_output = copy.deepcopy(output_to_replicate) for attribute_name in replicated_output.get_attributes( StorageAttribute): attribute_value = getattr(replicated_output, attribute_name) if isinstance(attribute_value, ProtocolPath): attribute_value = ProtocolPath.from_string( attribute_value.full_path.replace( replicator.placeholder_id, str(index))) elif isinstance(attribute_value, ReplicatorValue): if attribute_value.replicator_id != replicator.id: # Make sure to handle nested dependent replicators. attribute_value.replicator_id = attribute_value.replicator_id.replace( replicator.placeholder_id, str(index)) continue attribute_value = template_value setattr(replicated_output, attribute_name, attribute_value) schema.outputs_to_store[replicated_label] = replicated_output
def _apply_replicator(self, schema, replicator): """A method to create a set of protocol schemas based on a ProtocolReplicator, and add them to the list of existing schemas. Parameters ---------- schema: WorkflowSchema The schema which contains the protocol definitions replicator: `ProtocolReplicator` The replicator which describes which new protocols should be created. """ # Get the list of values which will be passed to the newly created protocols. template_values = self._get_template_values(replicator) # Replicate the protocols. protocols = {} for protocol_schema in schema.protocol_schemas: protocol = protocol_schema.to_protocol() protocols[protocol.id] = protocol replicated_protocols, replication_map = replicator.apply( protocols, template_values) replicator.update_references(replicated_protocols, replication_map, template_values) # Update the schema with the replicated protocols. schema.protocol_schemas = [ replicated_protocols[key].schema for key in replicated_protocols ] # Make sure to correctly replicate gradient sources. replicated_gradient_sources = [] if schema.gradients_sources != UNDEFINED: for gradient_source in schema.gradients_sources: if replicator.placeholder_id not in gradient_source.full_path: replicated_gradient_sources.append(gradient_source) continue for index, template_value in enumerate(template_values): replicated_source = ProtocolPath.from_string( gradient_source.full_path.replace( replicator.placeholder_id, str(index))) replicated_gradient_sources.append(replicated_source) schema.gradients_sources = replicated_gradient_sources # Replicate any outputs. self._apply_replicator_to_outputs(replicator, schema, template_values) # Replicate any replicators. self._apply_replicator_to_replicators(replicator, schema, template_values)
def _gather_results( directory, workflow_id, value_reference, gradient_sources, outputs_to_store, *protocol_result_paths, **_, ): """Gather the data associated with the workflows in this graph. Parameters ---------- directory: str The directory to store any working files in. workflow_id: str The id of the workflow associated with this result. value_reference: ProtocolPath, optional A reference to which property in the output dictionary is the actual value. gradient_sources: list of ProtocolPath A list of references to those entries in the output dictionaries which correspond to parameter gradients. outputs_to_store: dict of str and WorkflowOutputToStore A list of references to data which should be stored on the storage backend. protocol_results: dict of str and str The result dictionary of the protocol which calculated the value of the property. Returns ------- CalculationLayerResult, optional The result of attempting to estimate this property from a workflow graph. `None` will be returned if the target uncertainty is set but not met. """ return_object = WorkflowResult() return_object.workflow_id = workflow_id try: results_by_id = {} for protocol_id, protocol_result_path in protocol_result_paths: with open(protocol_result_path, "r") as file: protocol_results = json.load(file, cls=TypedJSONDecoder) # Make sure none of the protocols failed and we actually have a value # and uncertainty. if isinstance(protocol_results, EvaluatorException): return_object.exceptions.append(protocol_results) return return_object # Store the protocol results in a dictionary, with keys of the # path to the original protocol output. for protocol_path, output_value in protocol_results.items(): protocol_path = ProtocolPath.from_string(protocol_path) if (protocol_path.start_protocol is None or protocol_path.start_protocol != protocol_id): protocol_path.prepend_protocol_id(protocol_id) results_by_id[protocol_path] = output_value if value_reference is not None: return_object.value = results_by_id[value_reference] for gradient_source in gradient_sources: return_object.gradients.append(results_by_id[gradient_source]) return_object.data_to_store = [] for output_to_store in outputs_to_store.values(): unique_id = str(uuid.uuid4()).replace("-", "") data_object_path = path.join(directory, f"data_{unique_id}.json") data_directory = path.join(directory, f"data_{unique_id}") WorkflowGraph._store_output_data( data_object_path, data_directory, output_to_store, results_by_id, ) return_object.data_to_store.append( (data_object_path, data_directory)) except Exception as e: return_object.exceptions.append( EvaluatorException.from_exception(e)) return return_object