def find_persistent_entities(self, pipeline): """returns a map from a pipeline to the persistent pipeline, assuming those pieces exist""" persistent_p = self._persistent_pipeline object_map = {} module_id_map = {} connection_id_map = {} pipeline.refresh_signatures() # we must traverse vertices in topological sort order verts = pipeline.graph.vertices_topological_sort() for module_id in verts: sig = pipeline.subpipeline_signature(module_id) if persistent_p.has_subpipeline_signature(sig): i = persistent_p.subpipeline_id_from_signature(sig) module_id_map[module_id] = i object_map[module_id] = self._objects[i] else: module_id_map[module_id] = None object_map[module_id] = None for connection in pipeline.connections.itervalues(): sig = pipeline.connection_signature(connection.id) if persistent_p.has_connection_signature(sig): connection_id_map[connection.id] = \ persistent_p.connection_id_from_signature(sig) else: connection_id_map[connection.id] = None return (object_map, module_id_map, connection_id_map)
def add_to_persistent_pipeline(self, pipeline): """add_to_persistent_pipeline(pipeline): (module_id_map, connection_id_map, modules_added) Adds a pipeline to the persistent pipeline of the cached interpreter and adds current logging object to each existing module. Returns four things: two dictionaries describing the mapping of ids from the passed pipeline to the persistent one (the first one has the module id mapping, the second one has the connection id mapping), a set of all module ids added to the persistent pipeline, and a set of all connection ids added to the persistent pipeline.""" module_id_map = Bidict() connection_id_map = Bidict() modules_added = set() connections_added = set() pipeline.refresh_signatures() # we must traverse vertices in topological sort order verts = pipeline.graph.vertices_topological_sort() for new_module_id in verts: new_sig = pipeline.subpipeline_signature(new_module_id) if not self._persistent_pipeline.has_subpipeline_signature(new_sig): # Must add module to persistent pipeline persistent_module = copy.copy(pipeline.modules[new_module_id]) persistent_id = self._persistent_pipeline.fresh_module_id() persistent_module.id = persistent_id self._persistent_pipeline.add_module(persistent_module) self._persistent_pipeline.modules[persistent_id]._signature = \ base64.b16encode(new_sig).lower() module_id_map[new_module_id] = persistent_id modules_added.add(new_module_id) else: i = self._persistent_pipeline \ .subpipeline_id_from_signature(new_sig) module_id_map[new_module_id] = i for connection in pipeline.connections.itervalues(): new_sig = pipeline.connection_signature(connection.id) if not self._persistent_pipeline.has_connection_signature(new_sig): # Must add connection to persistent pipeline persistent_connection = copy.copy(connection) persistent_id = self._persistent_pipeline.fresh_connection_id() persistent_connection.id = persistent_id persistent_connection.sourceId = module_id_map[ connection.sourceId] persistent_connection.destinationId = module_id_map[ connection.destinationId] self._persistent_pipeline.add_connection(persistent_connection) connection_id_map[connection.id] = persistent_id connections_added.add(connection.id) else: i = self._persistent_pipeline \ .connection_id_from_signature(new_sig) connection_id_map[connection.id] = i # update persistent signatures self._persistent_pipeline.compute_signatures() return (module_id_map, connection_id_map, modules_added, connections_added)