class Flow(Tool): ''' A Flow-based framework. It executes steps in a sequential or multiprocess environment. User defined steps thanks to Python classes, and configuration in a json file The multiprocess mode is based on ZeroMQ library (http://zeromq.org) to pass messages between process. ZMQ library allows to stay away from class concurrency mechanisms like mutexes, critical sections semaphores, while being process safe. Passing data between steps is managed by the router. If a step is executed by several process, the router uses LRU pattern (least recently used ) to choose the step that will receive next data. The router also manage Queue for each step. ''' description = 'run stages in multiprocess Flow based framework' gui = Bool(False, help='send status to GUI').tag(config=True) gui_address = Unicode('localhost:5565', help='GUI adress and port').tag(config=True) mode = Enum(['sequential', 'multiprocess'], default_vallue='sequential', help='Flow mode', allow_none=True).tag(config=True) producer_conf = Dict(help='producer description: name , module, class', allow_none=False).tag(config=True) stagers_conf = List(help='stagers list description in a set order:', allow_none=False).tag(config=True) consumer_conf = Dict(default_value={ 'name': 'CONSUMER', 'class': 'Producer', 'module': 'producer', 'prev': 'STAGE1' }, help='producer description: name , module, class', allow_none=False).tag(config=True) ports_list = list(range(5555, 5600, 1)) zmq_ports = List(ports_list, help='ZMQ ports').tag(config=True) aliases = Dict({ 'gui_address': 'Flow.gui_address', 'mode': 'Flow.mode', 'gui': 'Flow.gui' }) examples = ('prompt%> ctapipe-flow \ --config=examples/flow/switch.json') PRODUCER = 'PRODUCER' STAGER = 'STAGER' CONSUMER = 'CONSUMER' ROUTER = 'ROUTER' producer = None consumer = None stagers = list() router = None producer_step = None stager_steps = None consumer_step = None step_process = list() router_process = None ports = dict() def setup(self): if self.init() is False: self.log.error('Could not initialise Flow based framework') exit() def init(self): ''' Create producers, stagers and consumers instance according to configuration Returns ------- bool : True if Flow based framework is correctly setup and all producer,stager and consumer initialised Otherwise False ''' # Verify configuration instance if not path.isfile(self.config_file): self.log.error( 'Could not open Flow based framework config_file {}'.format( self.config_file)) return False if not self.generate_steps(): self.log.error("Error during steps generation") return False if self.gui: self.context = zmq.Context() self.socket_pub = self.context.socket(zmq.PUB) if not self.connect_gui(): return False if self.mode == 'sequential': return self.init_sequential() elif self.mode == 'multiprocess': return self.init_multiprocess() else: self.log.error("{} is not a valid mode for" "Flow based framework".format(self.mode)) def init_multiprocess(self): """ Initialise Flow for multiprocess mode Returns ------- True if every initialisation are correct Otherwise False """ if not self.configure_ports(): return False if not self.configure_producer(): return False router_names = self.add_consumer_to_router() if not self.configure_consumer(): return False if not self.configure_stagers(router_names): return False gui_address = None if self.gui: gui_address = self.gui_address self.router = RouterQueue(connections=router_names, gui_address=gui_address) for step in self.stager_steps: for t in step.process: self.step_process.append(t) self.display_conf() return True def init_sequential(self): """ Initialise Flow for sequential mode Returns ------- True if every initialisation are correct Otherwise False """ self.configure_ports() self.sequential_instances = dict() # set coroutines # producer conf = self.get_step_conf(self.producer_step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False self.producer = ProducerSequential( coroutine, name=self.producer_step.name, connections=self.producer_step.connections, main_connection_name=self.producer_step.main_connection_name) self.producer.init() self.producer_step.process.append(self.producer) self.sequential_instances[self.producer_step.name] = self.producer # stages for step in (self.stager_steps): conf = self.get_step_conf(step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False stage = StagerSequential( coroutine, name=step.name, connections=step.connections, main_connection_name=step.main_connection_name) step.process.append(stage) self.sequential_instances[step.name] = stage self.stagers.append(stage) stage.init() # consumer conf = self.get_step_conf(self.consumer_step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False self.consumer = ConsumerSequential(coroutine, name=conf['name']) self.consumer_step.process.append(self.consumer) self.consumer.init() self.sequential_instances[self.consumer_step.name] = self.consumer self.display_conf() return True def configure_stagers(self, router_names): """ Creates Processes with users's coroutines for all stages Parameters ---------- router_names: List List to fill with routers name Returns ------- True if every instantiation is correct Otherwise False """ # STAGERS for stager_step in self.stager_steps: # each stage need a router to connect it to prev stages name = stager_step.name + '_' + 'router' router_names[name] = [ self.ports[stager_step.name + '_in'], self.ports[stager_step.name + '_out'], stager_step.queue_limit ] for i in range(stager_step.nb_process): conf = self.get_step_conf(stager_step.name) try: stager_zmq = self.instantiation( stager_step.name, self.STAGER, process_name=stager_step.name + '$$process_number$$' + str(i), port_in=stager_step.port_in, connections=stager_step.connections, main_connection_name=stager_step.main_connection_name, config=conf) except FlowError as e: self.log.error(e) return False self.stagers.append(stager_zmq) stager_step.process.append(stager_zmq) return True def configure_consumer(self): """ Creates consumer Processes with users's coroutines Returns ------- True if every instantiation is correct Otherwise False """ try: consumer_zmq = self.instantiation( self.consumer_step.name, self.CONSUMER, port_in=self.consumer_step.port_in, config=self.consumer_conf) except FlowError as e: self.log.error(e) return False self.consumer = consumer_zmq return True def add_consumer_to_router(self): """ Create router_names dictionary and Add consumer router ports Returns ------- The new router_names dictionary """ # ROUTER router_names = dict() # each stage need a router to connect it to prev stages name = self.consumer_step.name + '_' + 'router' router_names[name] = [ self.ports[self.consumer_step.name + '_in'], self.ports[self.consumer_step.name + '_out'], self.consumer_step.queue_limit ] return router_names def configure_producer(self): """ Creates producer Process with users's coroutines Returns ------- True if every instatiation is correct Otherwise False """ # PRODUCER try: producer_zmq = self.instantiation( self.producer_step.name, self.PRODUCER, connections=self.producer_step.connections, main_connection_name=self.producer_step.main_connection_name, config=self.producer_conf) except FlowError as e: self.log.error(e) return False self.producer = producer_zmq return True def connect_gui(self): """ Connect ZMQ socket to send information to GUI Returns ------- True if everything correct Otherwise False """ # Get port for GUI if self.gui_address is not None: try: self.socket_pub.connect('tcp://' + self.gui_address) except zmq.error.ZMQError as e: self.log.info(str(e) + 'tcp://' + self.gui_address) return False return True def generate_steps(self): """ Generate Flow based framework steps from configuration Returns ------- True if everything correct Otherwise False """ self.producer_step = self.get_pipe_steps(self.PRODUCER) self.stager_steps = self.get_pipe_steps(self.STAGER) self.consumer_step = self.get_pipe_steps(self.CONSUMER) if not self.producer_step: self.log.error("No producer in configuration") return False if not self.consumer_step: self.log.error("No consumer in configuration") return False return True def configure_ports(self): """ Configures producer, stagers and consumer ZMQ ports Returns ------- True if everything correct Otherwise False """ # configure connections (zmq port) for producer (one per next step) try: for next_step_name in self.producer_step.next_steps_name: if not next_step_name + '_in' in self.ports: self.ports[next_step_name + '_in'] = str( self.zmq_ports.pop()) self.producer_step.connections[next_step_name] = self.ports[ next_step_name + '_in'] self.producer_step.main_connection_name = ( self.producer_step.next_steps_name[0]) # configure port_in and connections (zmq port) # for all stages (one per next step) for stage in self.stager_steps: if stage.name + '_out' not in self.ports: self.ports[stage.name + '_out'] = str(self.zmq_ports.pop()) stage.port_in = self.ports[stage.name + '_out'] for next_step_name in stage.next_steps_name: if next_step_name + '_in' not in self.ports: self.ports[next_step_name + '_in'] = str( self.zmq_ports.pop()) stage.connections[next_step_name] = self.ports[ next_step_name + '_in'] stage.main_connection_name = stage.next_steps_name[0] # configure port-in (zmq port) for consumer if self.consumer_step.name + '_out' not in self.ports: self.ports[self.consumer_step.name + '_out'] = str( self.zmq_ports.pop()) self.consumer_step.port_in = self.ports[self.consumer_step.name + '_out'] return True except IndexError as e: self.log.error("Not enough ZMQ ports. Consider adding some port " "to configuration.") except Exception as e: self.log.error("Could not configure ZMQ ports. {}".format(e)) return False def get_step_by_name(self, name): ''' Find a PipeStep in self.producer_step or self.stager_steps or self.consumer_step Parameters ---------- name : str step name Returns ------- PipeStep if found, otherwise None ''' for step in (self.stager_steps + [self.producer_step, self.consumer_step]): if step.name == name: return step return None def instantiation(self, name, stage_type, process_name=None, port_in=None, connections=None, main_connection_name=None, config=None): ''' Instantiate on Python object from name found in configuration Parameters ---------- name : str stage name stage_type : str process_name : str port_in : str step ZMQ port in connections : dict key: StepName, value" connection ZMQ ports main_connection_name : str main ZMQ connection name. Connexion to use when user not precise ''' stage = self.get_step_conf(name) module = stage['module'] class_name = stage['class'] obj = dynamic_class_from_module(class_name, module, self) if obj is None: raise FlowError('Cannot create instance of ' + name) obj.name = name if stage_type == self.STAGER: process = StagerZmq(obj, port_in, process_name, connections=connections, main_connection_name=main_connection_name) elif stage_type == self.PRODUCER: process = ProducerZmq(obj, name, connections=connections, main_connection_name=main_connection_name) elif stage_type == self.CONSUMER: process = ConsumerZMQ(obj, port_in, name) else: raise FlowError('Cannot create instance of', name, '. Type', stage_type, 'does not exist.') # set coroutine socket to it's stager or producer socket . return process def get_pipe_steps(self, role): ''' Create a list of Flow based framework steps from configuration and filter by role Parameters ---------- role: str filter with role for step to be add in result list Accepted values: self.PRODUCER - self.STAGER - self.CONSUMER Returns ------- PRODUCER,CONSUMER: a step name filter by specific role (PRODUCER,CONSUMER) STAGER: List of steps name filter by specific role ''' # Create producer step try: if role == self.PRODUCER: prod_step = PipeStep(self.producer_conf['name']) prod_step.type = self.PRODUCER prod_step.next_steps_name = self.producer_conf[ 'next_steps'].split(',') return prod_step elif role == self.STAGER: # Create stagers steps result = list() for stage_conf in self.stagers_conf: try: nb_process = int(stage_conf['nb_process']) except Exception: nb_process = 1 next_steps_name = stage_conf['next_steps'].split(',') try: queue_limit = stage_conf['queue_limit'] except Exception: queue_limit = -1 stage_step = PipeStep(stage_conf['name'], next_steps_name=next_steps_name, nb_processes=nb_process, queue_limit=queue_limit) stage_step.type = self.STAGER result.append(stage_step) return result elif role == self.CONSUMER: # Create consumer step try: queue_limit = self.consumer_conf['queue_limit'] except: queue_limit = -1 cons_step = PipeStep(self.consumer_conf['name'], queue_limit=queue_limit) cons_step.type = self.CONSUMER return cons_step return result except KeyError: return None def def_step_for_gui(self): ''' Create a list (levels_for_gui) containing all steps Returns ------- the created list and actual time ''' levels_for_gui = list() levels_for_gui.append( StagerRep(self.producer_step.name, self.producer_step.next_steps_name, nb_job_done=self.producer.nb_job_done, running=self.producer.running, step_type=StagerRep.PRODUCER)) for step in self.stager_steps: nb_job_done = 0 running = 0 if self.mode == 'sequential': running = step.process[0].running nb_job_done = step.process[0].nb_job_done levels_for_gui.append( StagerRep(step.name, step.next_steps_name, nb_job_done=nb_job_done, running=running, nb_process=len(step.process))) elif self.mode == 'multiprocess': for process in step.process: nb_job_done += process.nb_job_done running += process.running levels_for_gui.append( StagerRep(process.name, step.next_steps_name, nb_job_done=nb_job_done, running=running, nb_process=len(step.process))) levels_for_gui.append( StagerRep(self.consumer_step.name, nb_job_done=self.consumer.nb_job_done, running=self.consumer.running, step_type=StagerRep.CONSUMER)) return (levels_for_gui, time()) def display_conf(self): ''' Print steps and their next_steps ''' self.log.info('') self.log.info( '------------------ Flow configuration ------------------') for step in ([self.producer_step] + self.stager_steps + [self.consumer_step]): if self.mode == 'multiprocess': self.log.info('step {} (nb process {}) '.format( step.name, str(step.nb_process))) else: self.log.info('step {}'.format(step.name)) for next_step_name in step.next_steps_name: self.log.info('--> next {} '.format(next_step_name)) self.log.info( '------------------ End Flow configuration ------------------') self.log.info('') def display_statistics(self): """ Log each StagerRep statistic """ steps, _ = self.def_step_for_gui() for step in steps: self.log.info(step.get_statistics()) def start(self): """ run the Flow based framework steps """ if self.mode == 'multiprocess': self.start_multiprocess() elif self.mode == 'sequential': self.start_sequential() def start_sequential(self): """ run the Flow based framework in sequential mode """ if self.gui: self.socket_pub.send_multipart([b'MODE', dumps('sequential')]) start_time = time() # self.producer.running = 0 # Get producer instance's generator self.producer = self.sequential_instances[self.producer_step.name] # execute producer run coroutine prod_gen = self.producer.run() # only for gui if self.gui: self.producer.running = 1 self.send_status_to_gui() # for each producer output for prod_result in prod_gen: if self.gui: self.producer.running = 0 self.send_status_to_gui() # get next stage destination and input from producer output msg, destination = prod_result # run each steps until consumer return if msg is not None: destination, msg = self.run_generator(destination, msg) if self.gui: self.producer.running = 1 self.send_status_to_gui() if self.gui: self.consumer.running = 0 self.send_status_to_gui() # execute finish method for all steps for step in self.sequential_instances.values(): step.finish() end_time = time() self.log.info('=== SEQUENTIAL MODE END ===') self.log.info('Compute time {} sec'.format(end_time - start_time)) self.display_statistics() # send finish to GUI and close connections if self.gui: self.socket_pub.send_multipart([b'FINISH', dumps('finish')]) self.socket_pub.close() self.context.destroy() self.context.term() def run_generator(self, destination, msg): """ Get step for destination. Create a genetor from its run method. re-enter in run_generator until Generator send values Parameters ---------- destination: str Next step name msg: a Pickle dumped msg Returns ------- Next destination and msg """ stage = self.sequential_instances[destination] stage.running = 1 if self.gui: self.send_status_to_gui() stage_gen = stage.run(msg) stage.running = 0 if stage_gen: for result in stage_gen: if result: msg, destination = result destination, msg = self.run_generator(destination, msg) else: msg = destination = None else: msg = destination = None return (msg, destination) def send_status_to_gui(self): """ Update all StagerRep status and send them to GUI """ self.socket_pub.send_multipart([b'MODE', dumps(self.mode)]) levels_gui, conf_time = self.def_step_for_gui() self.socket_pub.send_multipart( [b'GUI_GRAPH', dumps([conf_time, levels_gui])]) def start_multiprocess(self): ''' Start all Flow based framework processes. Regularly inform GUI of Flow based framework configuration in case of a new GUI instance was lunch Stop all processes without loosing data ''' # send Flow based framework cofiguration to an optinal GUI instance if self.gui: self.send_status_to_gui() start_time = time() # Start all process self.consumer.start() self.router.start() for stage in self.stagers: stage.start() self.producer.start() # Wait producer end of run method self.wait_and_send_levels(self.producer) # Ensure that all queues are empty and all process are waiting for # new data since more that a specific tine while not self.wait_all_stagers(1000): # 1000 ms if self.gui: self.send_status_to_gui() sleep(1) # Now send stop to stage process and wait they join for worker in self.step_process: self.wait_and_send_levels(worker) # Stop consumer and router process self.wait_and_send_levels(self.consumer) self.wait_and_send_levels(self.router) if self.gui: self.send_status_to_gui() # Wait 1 s to be sure this message will be display end_time = time() self.log.info('=== MULTUPROCESSUS MODE END ===') self.log.info('Compute time {} sec'.format(end_time - start_time)) self.display_statistics() sleep(1) if self.gui: self.socket_pub.send_multipart([b'FINISH', dumps('finish')]) self.socket_pub.close() self.context.destroy() self.context.term() def wait_all_stagers(self, mintime): """ Verify id all steps (stage + consumers) are finished their jobs and waiting Returns ------- True if all stages queue are empty and all Processes wait since mintime Otherwise False """ if self.router.total_queue_size == 0: for worker in self.step_process: if worker.wait_since < mintime: # 5000ms return False return True return False def finish(self): self.log.info('===== Flow END ======') def wait_and_send_levels(self, processes_to_wait): ''' Wait for a process to join and regularly send Flow based framework state to GUI in case of a GUI will connect later Parameters ---------- processes_to_wait : process process to join conf_time : str represents time at which configuration has been built ''' processes_to_wait.stop = 1 while True: processes_to_wait.join(timeout=.1) if self.gui: self.send_status_to_gui() if not processes_to_wait.is_alive(): return def get_step_conf(self, name): ''' Search step by its name in self.stage_conf list, self.producer_conf and self.consumer_conf Parameters ---------- name : str stage name Returns ------- Step name matching instance, or None is not found ''' if self.producer_conf['name'] == name: return self.producer_conf if self.consumer_conf['name'] == name: return self.consumer_conf for step in self.stagers_conf: if step['name'] == name: return step return None def get_stager_indice(self, name): ''' Search step by its name in self.stage_conf list Parameters ---------- name : str stage name Returns ------- indice in list, -1 if not found ''' for index, step in enumerate(self.stagers_conf): if step['name'] == name: return index return -1
class Flow(Tool): ''' A Flow-based framework. It executes steps in a sequential or multiprocess environment. User defined steps thanks to Python classes, and configuration in a json file The multiprocess mode is based on ZeroMQ library (http://zeromq.org) to pass messages between process. ZMQ library allows to stay away from class concurrency mechanisms like mutexes, critical sections semaphores, while being process safe. Passing data between steps is managed by the router. If a step is executed by several process, the router uses LRU pattern (least recently used ) to choose the step that will receive next data. The router also manage Queue for each step. ''' description = 'run stages in multiprocess Flow based framework' gui = Bool(False, help='send status to GUI').tag( config=True) gui_address = Unicode('localhost:5565', help='GUI adress and port')\ .tag(config=True) mode = Enum(['sequential','multiprocess'], default_vallue='sequential', help='Flow mode', allow_none=True).tag(config=True) producer_conf = Dict(help='producer description: name , module, class', allow_none=False).tag(config=True) stagers_conf = List( help='stagers list description in a set order:', allow_none=False).tag(config=True) consumer_conf = Dict( default_value={'name': 'CONSUMER', 'class': 'Producer', 'module': 'producer', 'prev': 'STAGE1'}, help='producer description: name , module, class', allow_none=False).tag(config=True) ports_list = list(range(5555,5600,1)) zmq_ports = List(ports_list, help='ZMQ ports').tag(config=True) aliases = Dict({'gui_address': 'Flow.gui_address', 'mode':'Flow.mode','gui': 'Flow.gui'}) examples = ('prompt%> ctapipe-flow \ --config=examples/flow/switch.json') PRODUCER = 'PRODUCER' STAGER = 'STAGER' CONSUMER = 'CONSUMER' ROUTER = 'ROUTER' producer = None consumer = None stagers = list() router = None producer_step = None stager_steps = None consumer_step = None step_process = list() router_process = None ports = dict() def setup(self): if self.init() == False: self.log.error('Could not initialise Flow based framework') exit() def init(self): ''' Create producers, stagers and consumers instance according to configuration Returns ------- bool : True if Flow based framework is correctly setup and all producer,stager and consumer initialised Otherwise False ''' # Verify configuration instance if not path.isfile(self.config_file): self.log.error('Could not open Flow based framework config_file {}' .format(self.config_file)) return False if not self.generate_steps(): self.log.error("Error during steps generation") return False if self.gui : self.context = zmq.Context() self.socket_pub = self.context.socket(zmq.PUB) if not self.connect_gui(): return False if self.mode == 'sequential': return self.init_sequential() elif self.mode == 'multiprocess': return self.init_multiprocess() else: self.log.error("{} is not a valid mode for Flow based framework".format(self.mode)) def init_multiprocess(self): """ Initialise Flow for multiprocess mode Returns ------- True if every initialisation are correct Otherwise False """ if not self.configure_ports() : return False if not self.configure_producer() : return False router_names = self.add_consumer_to_router() if not self.configure_consumer(): return False if not self.configure_stagers(router_names) : return False gui_address = None if self.gui: gui_address = self.gui_address self.router = RouterQueue(connections=router_names, gui_address=gui_address) for step in self.stager_steps: for t in step.process: self.step_process.append(t) self.display_conf() return True def init_sequential(self): """ Initialise Flow for sequential mode Returns ------- True if every initialisation are correct Otherwise False """ self.configure_ports() self.sequential_instances = dict() # set coroutines #producer conf = self.get_step_conf(self.producer_step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False self.producer = ProducerSequential(coroutine, name=self.producer_step.name, connections=self.producer_step.connections, main_connection_name = self.producer_step.main_connection_name) self.producer.init() self.producer_step.process.append(self.producer) self.sequential_instances[self.producer_step.name] = self.producer #stages for step in (self.stager_steps ): conf = self.get_step_conf(step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False stage = StagerSequential(coroutine,name = step.name, connections=step.connections, main_connection_name=step.main_connection_name) step.process.append(stage) self.sequential_instances[step.name] = stage self.stagers.append(stage) stage.init() #consumer conf = self.get_step_conf(self.consumer_step.name) module = conf['module'] class_name = conf['class'] try: coroutine = dynamic_class_from_module(class_name, module, self) except DynamicClassError as e: self.log.error('{}'.format(e)) return False self.consumer = ConsumerSequential(coroutine, name = conf['name']) self.consumer_step.process.append(self.consumer) self.consumer.init() self.sequential_instances[self.consumer_step.name] = self.consumer self.display_conf() return True def configure_stagers(self,router_names): """ Creates Processes with users's coroutines for all stages Parameters ---------- router_names: List List to fill with routers name Returns ------- True if every instantiation is correct Otherwise False """ #STAGERS for stager_step in self.stager_steps: # each stage need a router to connect it to prev stages name = stager_step.name + '_' + 'router' router_names[name] = [self.ports[stager_step.name+'_in'], self.ports[stager_step.name+'_out'], stager_step.queue_limit] for i in range(stager_step.nb_process): conf = self.get_step_conf(stager_step.name) try: stager_zmq = self.instantiation(stager_step.name, self.STAGER, process_name=stager_step.name + '$$process_number$$' + str(i), port_in=stager_step.port_in, connections=stager_step.connections, main_connection_name=stager_step.main_connection_name, config=conf) except FlowError as e: self.log.error(e) return False self.stagers.append(stager_zmq) stager_step.process.append(stager_zmq) return True def configure_consumer(self): """ Creates consumer Processes with users's coroutines Returns ------- True if every instantiation is correct Otherwise False """ try: consumer_zmq = self.instantiation(self.consumer_step.name, self.CONSUMER, port_in=self.consumer_step.port_in, config=self.consumer_conf) except FlowError as e: self.log.error(e) return False self.consumer = consumer_zmq return True def add_consumer_to_router(self): """ Create router_names dictionary and Add consumer router ports Returns ------- The new router_names dictionary """ # ROUTER router_names = dict() # each stage need a router to connect it to prev stages name = self.consumer_step.name + '_' + 'router' router_names[name] = [self.ports[self.consumer_step.name+'_in'], self.ports[self.consumer_step.name+'_out'], self.consumer_step.queue_limit] return router_names def configure_producer(self): """ Creates producer Process with users's coroutines Returns ------- True if every instatiation is correct Otherwise False """ #PRODUCER try: producer_zmq = self.instantiation(self.producer_step.name, self.PRODUCER, connections=self.producer_step.connections, main_connection_name=self.producer_step.main_connection_name, config=self.producer_conf) except FlowError as e: self.log.error(e) return False self.producer = producer_zmq return True def connect_gui(self): """ Connect ZMQ socket to send information to GUI Returns ------- True if everything correct Otherwise False """ # Get port for GUI if self.gui_address is not None: try: self.socket_pub.connect('tcp://' + self.gui_address) except zmq.error.ZMQError as e: self.log.info(str(e) + 'tcp://' + self.gui_address) return False return True def generate_steps(self): """ Generate Flow based framework steps from configuration Returns ------- True if everything correct Otherwise False """ self.producer_step = self.get_pipe_steps(self.PRODUCER) self.stager_steps = self.get_pipe_steps(self.STAGER) self.consumer_step = self.get_pipe_steps(self.CONSUMER) if not self.producer_step: self.log.error("No producer in configuration") return False if not self.consumer_step: self.log.error("No consumer in configuration") return False return True def configure_ports(self): """ Configures producer, stagers and consumer ZMQ ports Returns ------- True if everything correct Otherwise False """ #configure connections (zmq port) for producer (one per next step) try: for next_step_name in self.producer_step.next_steps_name: if not next_step_name+'_in' in self.ports: self.ports[next_step_name+'_in'] = str(self.zmq_ports.pop()) self.producer_step.connections[next_step_name]=self.ports[next_step_name+'_in'] self.producer_step.main_connection_name = self.producer_step.next_steps_name[0] #configure port_in and connections (zmq port) for all stages (one per next step) for stage in self.stager_steps: if not stage.name+'_out' in self.ports: self.ports[stage.name+'_out'] = str(self.zmq_ports.pop()) stage.port_in = self.ports[stage.name+'_out'] for next_step_name in stage.next_steps_name: if not next_step_name+'_in' in self.ports: self.ports[next_step_name+'_in'] = str(self.zmq_ports.pop()) stage.connections[next_step_name]=self.ports[next_step_name+'_in'] stage.main_connection_name = stage.next_steps_name[0] #configure port-in (zmq port) for consumer if not self.consumer_step.name+'_out' in self.ports: self.ports[ self.consumer_step.name+'_out'] = str(self.zmq_ports.pop()) self.consumer_step.port_in = self.ports[ self.consumer_step.name+'_out'] return True except IndexError as e: self.log.error("Not enough ZMQ ports. Consider adding some port " "to configuration.") except Exception as e: self.log.error("Could not configure ZMQ ports. {}".format(e)) return False def get_step_by_name(self, name): ''' Find a PipeStep in self.producer_step or self.stager_steps or self.consumer_step Parameters ---------- name : str step name Returns ------- PipeStep if found, otherwise None ''' for step in (self.stager_steps+[self.producer_step,self.consumer_step]): if step.name == name: return step return None def instantiation(self, name, stage_type, process_name=None, port_in=None, connections=None, main_connection_name=None, config=None): ''' Instantiate on Python object from name found in configuration Parameters ---------- name : str stage name stage_type : str process_name : str port_in : str step ZMQ port in connections : dict key: StepName, value" connection ZMQ ports main_connection_name : str main ZMQ connection name. Connexion to use when user not precise ''' stage = self.get_step_conf(name) module = stage['module'] class_name = stage['class'] obj = dynamic_class_from_module(class_name, module, self) if obj is None: raise FlowError('Cannot create instance of ' + name) obj.name = name if stage_type == self.STAGER: process = StagerZmq( obj, port_in, process_name, connections=connections, main_connection_name = main_connection_name) elif stage_type == self.PRODUCER: process = ProducerZmq( obj, name, connections=connections, main_connection_name= main_connection_name) elif stage_type == self.CONSUMER: process = ConsumerZMQ( obj,port_in, name) else: raise FlowError( 'Cannot create instance of', name, '. Type', stage_type, 'does not exist.') # set coroutine socket to it's stager or producer socket . return process def get_pipe_steps(self, role): ''' Create a list of Flow based framework steps from configuration and filter by role Parameters ---------- role: str filter with role for step to be add in result list Accepted values: self.PRODUCER - self.STAGER - self.CONSUMER Returns ------- PRODUCER,CONSUMER: a step name filter by specific role (PRODUCER,CONSUMER) STAGER: List of steps name filter by specific role ''' # Create producer step try: if role == self.PRODUCER: prod_step = PipeStep(self.producer_conf['name']) prod_step.type = self.PRODUCER prod_step.next_steps_name = self.producer_conf['next_steps'].split(',') return prod_step elif role == self.STAGER: # Create stagers steps result = list() for stage_conf in self.stagers_conf: try: nb_process = int(stage_conf['nb_process']) except Exception as e: nb_process = 1 next_steps_name = stage_conf['next_steps'].split(',') try: queue_limit = stage_conf['queue_limit'] except Exception: queue_limit = -1 stage_step = PipeStep(stage_conf['name'], next_steps_name=next_steps_name, nb_processes=nb_process, queue_limit = queue_limit) stage_step.type = self.STAGER result.append(stage_step) return result elif role == self.CONSUMER: # Create consumer step try: queue_limit = self.consumer_conf['queue_limit'] except: queue_limit = -1 cons_step = PipeStep(self.consumer_conf['name'],queue_limit = queue_limit) cons_step.type = self.CONSUMER return cons_step return result except KeyError as e: return None def def_step_for_gui(self): ''' Create a list (levels_for_gui) containing all steps Returns ------- the created list and actual time ''' levels_for_gui = list() levels_for_gui.append(StagerRep(self.producer_step.name, self.producer_step.next_steps_name, nb_job_done=self.producer.nb_job_done, running=self.producer.running, step_type=StagerRep.PRODUCER)) for step in self.stager_steps: nb_job_done = 0 running = 0 if self.mode == 'sequential': running = step.process[0].running nb_job_done = step.process[0].nb_job_done levels_for_gui.append(StagerRep(step.name,step.next_steps_name, nb_job_done=nb_job_done, running=running, nb_process = len(step.process))) elif self.mode == 'multiprocess': for process in step.process: nb_job_done+=process.nb_job_done running += process.running levels_for_gui.append(StagerRep(process.name,step.next_steps_name, nb_job_done=nb_job_done, running=running, nb_process = len(step.process))) levels_for_gui.append(StagerRep(self.consumer_step.name, nb_job_done=self.consumer.nb_job_done, running=self.consumer.running, step_type=StagerRep.CONSUMER)) return (levels_for_gui,time()) def display_conf(self): ''' Print steps and their next_steps ''' self.log.info('') self.log.info('------------------ Flow configuration ------------------') for step in ([self.producer_step ] + self.stager_steps + [self.consumer_step]): if self.mode == 'multiprocess': self.log.info('step {} (nb process {}) '.format(step.name,str(step.nb_process))) else: self.log.info('step {}'.format(step.name)) for next_step_name in step.next_steps_name: self.log.info('--> next {} '.format(next_step_name)) self.log.info('------------------ End Flow configuration ------------------') self.log.info('') def display_statistics(self): """ Log each StagerRep statistic """ steps,_ = self.def_step_for_gui() for step in steps: self.log.info(step.get_statistics()) def start(self): """ run the Flow based framework steps """ if self.mode == 'multiprocess': self.start_multiprocess() elif self.mode == 'sequential': self.start_sequential() def start_sequential(self): """ run the Flow based framework in sequential mode """ if self.gui : self.socket_pub.send_multipart( [b'MODE', dumps('sequential')]) start_time = time() #self.producer.running = 0 # Get producer instance's generator self.producer = self.sequential_instances[self.producer_step.name] #execute producer run coroutine prod_gen = self.producer.run() # only for gui if self.gui : self.producer.running = 1 self.send_status_to_gui() #for each producer output for prod_result in prod_gen: if self.gui : self.producer.running = 0 self.send_status_to_gui() # get next stage destination and input from producer output msg,destination = prod_result # run each steps until consumer return while msg != None: destination, msg=self.run_generator(destination,msg) if self.gui : self.producer.running = 1 self.send_status_to_gui() if self.gui : self.consumer.running=0 self.send_status_to_gui() # execute finish method for all steps for step in self.sequential_instances.values(): step.finish() end_time = time() self.log.info('=== SEQUENTIAL MODE END ===') self.log.info('Compute time {} sec'.format(end_time - start_time)) self.display_statistics() # send finish to GUI and close connections if self.gui : self.socket_pub.send_multipart( [b'FINISH', dumps('finish')]) self.socket_pub.close() self.context.destroy() self.context.term() def run_generator(self, destination ,msg): """ Get step for destination. Create a genetor from its run method. re-enter in run_generator until Generator send values Parameters ---------- destination: str Next step name msg: a Pickle dumped msg Returns ------- Next destination and msg """ stage = self.sequential_instances[destination] stage.running = 1 if self.gui : self.send_status_to_gui() stage_gen = stage.run(msg) stage.running = 0 if stage_gen: for result in stage_gen: if result: msg,destination = result destination, msg=self.run_generator(destination,msg) else: msg = destination = None else: msg = destination = None return (msg,destination) def send_status_to_gui(self): """ Update all StagerRep status and send them to GUI """ self.socket_pub.send_multipart([b'MODE', dumps(self.mode)]) levels_gui,conf_time = self.def_step_for_gui() self.socket_pub.send_multipart( [b'GUI_GRAPH', dumps([conf_time, levels_gui])]) def start_multiprocess(self): ''' Start all Flow based framework processes. Regularly inform GUI of Flow based framework configuration in case of a new GUI instance was lunch Stop all processes without loosing data ''' # send Flow based framework cofiguration to an optinal GUI instance if self.gui : self.send_status_to_gui() start_time = time() # Start all process self.consumer.start() self.router.start() for stage in self.stagers: stage.start() self.producer.start() # Wait producer end of run method self.wait_and_send_levels(self.producer) # Ensure that all queues are empty and all process are waiting for # new data since more that a specific tine while not self.wait_all_stagers(1000): # 1000 ms if self.gui : self.send_status_to_gui() sleep(1) # Now send stop to stage process and wait they join for worker in self.step_process: self.wait_and_send_levels(worker) # Stop consumer and router process self.wait_and_send_levels(self.consumer) self.wait_and_send_levels(self.router) if self.gui : self.send_status_to_gui() # Wait 1 s to be sure this message will be display end_time = time() self.log.info('=== MULTUPROCESSUS MODE END ===') self.log.info('Compute time {} sec'.format(end_time - start_time)) self.display_statistics() sleep(1) if self.gui : self.socket_pub.send_multipart( [b'FINISH', dumps('finish')]) self.socket_pub.close() self.context.destroy() self.context.term() def wait_all_stagers(self,mintime): """ Verify id all steps (stage + consumers) are finished their jobs and waiting Returns ------- True if all stages queue are empty and all Processes wait since mintime Otherwise False """ if self.router.total_queue_size == 0 : for worker in self.step_process: if worker.wait_since < mintime: # 5000ms return False return True return False def finish(self): self.log.info('===== Flow END ======') def wait_and_send_levels(self, processes_to_wait): ''' Wait for a process to join and regularly send Flow based framework state to GUI in case of a GUI will connect later Parameters ---------- processes_to_wait : process process to join conf_time : str represents time at which configuration has been built ''' processes_to_wait.stop = 1 while True: processes_to_wait.join(timeout=.1) if self.gui : self.send_status_to_gui() if not processes_to_wait.is_alive(): return def get_step_conf(self, name): ''' Search step by its name in self.stage_conf list, self.producer_conf and self.consumer_conf Parameters ---------- name : str stage name Returns ------- Step name matching instance, or None is not found ''' if self.producer_conf['name'] == name: return self.producer_conf if self.consumer_conf['name'] == name: return self.consumer_conf for step in self.stagers_conf: if step['name'] == name: return step return None def get_stager_indice(self, name): ''' Search step by its name in self.stage_conf list Parameters ---------- name : str stage name Returns ------- indice in list, -1 if not found ''' for index, step in enumerate(self.stagers_conf): if step['name'] == name: return index return -1