Esempio n. 1
0
 def init_multiprocessus(self):
     """
     Initialise Flow for multiprocessus mode
     Returns:
     ========
     True if every initialisation are correct
     Otherwise False
     """
     if not self.configure_ports() : return False
     if not self.configure_producer() : return False
     router_names =  self.add_consumer_to_router()
     if not self.configure_consumer(): return False
     if not self.configure_stagers(router_names) : return False
     gui_address = None
     if self.gui:
         gui_address = self.gui_address
     self.router = RouterQueue(connexions=router_names,
                          gui_address=gui_address)
     for step in self.stager_steps:
         for t in step.processus:
             self.step_processus.append(t)
     self.display_conf()
     return True
Esempio n. 2
0
class Flow(Tool):
    '''
    A Flow-based framework. It executes steps in a sequential or
    multiprocessus environment.
    User defined steps thanks to Python classes, and configuration in a json file
    The multiprocessus mode is based on ZeroMQ library (http://zeromq.org) to
    pass messages between processus. ZMQ library allows to stay away from class
    concurrency mechanisms like mutexes, critical sections semaphores,
    while being processus safe. Passing data between steps is managed by the router.
    If a step is executed by several processus, the router uses LRU pattern
    (least recently used ) to choose the step that will receive next data.
    The router also manage Queue for each step.
    '''
    description = 'run stages in multiprocessus Flow based framework'
    gui = Bool(False, help='send status to GUI').tag(
        config=True, allow_none=True)
    gui_address = Unicode('localhost:5565', help='GUI adress and port').tag(
        config=True, allow_none=True)
    mode = Unicode('sequential', help='Flow mode [sequential | multiprocessus]').tag(
        config=True, allow_none=True)
    producer_conf = Dict(
        help='producer description: name , module, class',
                                            allow_none=False).tag(config=True)
    stagers_conf = List(
        help='stagers list description in a set order:',
         allow_none=False).tag(config=True)
    consumer_conf = Dict(
        default_value={'name': 'CONSUMER', 'class': 'Producer',
                       'module': 'producer',  'prev': 'STAGE1'},
        help='producer description: name , module, class',
                allow_none=False).tag(config=True)
    ports_list = list(range(5555,5600,1))
    zmq_ports = List(ports_list, help='ZMQ ports').tag(
        config=True, allow_none=True)
    aliases = Dict({'gui_address': 'Flow.gui_address',
                    'mode':'Flow.mode','gui': 'Flow.gui'})
    examples = ('prompt%> ctapipe-flow \
    --config=examples/flow/switch.json')

    PRODUCER = 'PRODUCER'
    STAGER = 'STAGER'
    CONSUMER = 'CONSUMER'
    ROUTER = 'ROUTER'

    producer = None
    consumer = None
    stagers = list()
    router = None
    producer_step = None
    stager_steps = None
    consumer_step = None
    step_processus = list()
    router_processus = None
    ports = dict()

    def setup(self):
        if self.init() == False:
            self.log.error('Could not initialise Flow based framework')
            exit()

    def init(self):
        '''
        Create producers, stagers and consumers instance according to
         configuration
        Returns:
        --------
        bool : True if Flow based framework is correctly setup and all producer,stager
         and consumer initialised Otherwise False
        '''
        # Verify configuration instance
        if not path.isfile(self.config_file):
            self.log.error('Could not open Flow based framework config_file {}'
                           .format(self.config_file))
            return False
        if not self.generate_steps():
            self.log.error("Error during steps generation")
            return False
        if self.gui :
            self.context = zmq.Context()
            self.socket_pub = self.context.socket(zmq.PUB)
            if not self.connect_gui():  return False
        if self.mode == 'sequential':
            return self.init_sequential()
        elif self.mode == 'multiprocessus':
            return self.init_multiprocessus()
        else:
            self.log.error("{} is not a valid mode for Flow based framework".format(self.mode))

    def init_multiprocessus(self):
        """
        Initialise Flow for multiprocessus mode
        Returns:
        ========
        True if every initialisation are correct
        Otherwise False
        """
        if not self.configure_ports() : return False
        if not self.configure_producer() : return False
        router_names =  self.add_consumer_to_router()
        if not self.configure_consumer(): return False
        if not self.configure_stagers(router_names) : return False
        gui_address = None
        if self.gui:
            gui_address = self.gui_address
        self.router = RouterQueue(connexions=router_names,
                             gui_address=gui_address)
        for step in self.stager_steps:
            for t in step.processus:
                self.step_processus.append(t)
        self.display_conf()
        return True

    def init_sequential(self):
        """
        Initialise Flow for sequential mode
        Returns:
        ========
        True if every initialisation are correct
        Otherwise False
        """
        self.configure_ports()
        self.sequential_instances = dict()
        # set coroutines
        #producer
        conf = self.get_step_conf(self.producer_step.name)
        module = conf['module']
        class_name = conf['class']
        try:
            coroutine = dynamic_class_from_module(class_name, module, self)
        except DynamicClassError as e:
            self.log.error('{}'.format(e))
            return False

        self.producer = ProducerSequential(coroutine, name=self.producer_step.name,
                                  connexions=self.producer_step.connexions,
                                  main_connexion_name = self.producer_step.main_connexion_name)
        self.producer.init()
        self.producer_step.processus.append(self.producer)
        self.sequential_instances[self.producer_step.name] = self.producer
        #stages
        for step in (self.stager_steps ):
            conf = self.get_step_conf(step.name)
            module = conf['module']
            class_name = conf['class']
            try:
                coroutine = dynamic_class_from_module(class_name, module, self)
            except DynamicClassError as e:
                self.log.error('{}'.format(e))
                return False

            stage = StagerSequential(coroutine,name = step.name, connexions=step.connexions,
                                     main_connexion_name=step.main_connexion_name)
            step.processus.append(stage)
            self.sequential_instances[step.name] = stage
            self.stagers.append(stage)
            stage.init()
        #consumer
        conf = self.get_step_conf(self.consumer_step.name)
        module = conf['module']
        class_name = conf['class']
        try:
            coroutine = dynamic_class_from_module(class_name, module, self)
        except DynamicClassError as e:
            self.log.error('{}'.format(e))
            return False
        self.consumer = ConsumerSequential(coroutine, name =  conf['name'])
        self.consumer_step.processus.append(self.consumer)
        self.consumer.init()
        self.sequential_instances[self.consumer_step.name] = self.consumer
        self.display_conf()
        return True

    def configure_stagers(self,router_names):
        """ Creates Processus with users's coroutines for all stages
        Parameters:
        ===========
        router_names: List
            List to fill with routers name
        Returns:
        ========
        True if every instantialtion are correct
        Otherwise False
        """
        #STAGERS
        for stager_step in self.stager_steps:
            # each stage need a router to connect it to prev stages
            name = stager_step.name + '_' + 'router'
            router_names[name] = [self.ports[stager_step.name+'_in'],
                                  self.ports[stager_step.name+'_out'],
                                  stager_step.queue_limit]

            for i in range(stager_step.nb_processus):
                conf = self.get_step_conf(stager_step.name)
                try:
                    stager_zmq = self.instantiation(
                        stager_step.name ,
                        self.STAGER,
                        processus_name = stager_step.name
                            +'$$processus_number$$'
                            + str(i),
                        port_in=stager_step.port_in,
                        connexions = stager_step.connexions,
                        main_connexion_name = stager_step.main_connexion_name,
                        config=conf)
                except FlowError as e:
                    self.log.error(e)
                    return False
                self.stagers.append(stager_zmq)
                stager_step.processus.append(stager_zmq)
        return True


    def configure_consumer(self):
        """ Creates consumer Processus with users's coroutines
        Returns:
        ========
        True if every instantialtion are correct
        Otherwise False
        """
        try:
            consumer_zmq = self.instantiation(self.consumer_step.name,
                                      self.CONSUMER,
                                      port_in=self.consumer_step.port_in,
                                      config=self.consumer_conf)
        except FlowError as e:
            self.log.error(e)
            return False
        self.consumer = consumer_zmq
        return True

    def add_consumer_to_router(self):
        """ Create router_names dictionary and
        Add consumer router ports
        Returns:
        ========
        The new router_names dictionary
        """
        # ROUTER
        router_names = dict()
        # each stage need a router to connect it to prev stages
        name = self.consumer_step.name + '_' + 'router'
        router_names[name] = [self.ports[self.consumer_step.name+'_in'],
                              self.ports[self.consumer_step.name+'_out'],
                              self.consumer_step.queue_limit]
        return router_names

    def configure_producer(self):
        """ Creates producer Processus with users's coroutines
        Returns:
        ========
        True if every instantialtion are correct
        Otherwise False
        """
        #PRODUCER
        try:
            producer_zmq = self.instantiation(
                self.producer_step.name, self.PRODUCER,
                connexions = self.producer_step.connexions,
                main_connexion_name = self.producer_step.main_connexion_name,
                config= self.producer_conf)
        except FlowError as e:
            self.log.error(e)
            return False
        self.producer = producer_zmq
        return True

    def connect_gui(self):
        """ Connect ZMQ socket to send informations to GUI
        Returns:
        ========
        True if everything correct
        Otherwise False
        """
        # Get port for GUI
        if self.gui_address is not None:
            try:
                self.socket_pub.connect('tcp://' + self.gui_address)
            except zmq.error.ZMQError as e:
                self.log.info(str(e) + 'tcp://' + self.gui_address)
                return False
        return True


    def generate_steps(self):
        """ Generate Flow based framework steps from configuration
        Returns:
        ========
        True if everything correct
        Otherwise False
        """
        self.producer_step = self.get_pipe_steps(self.PRODUCER)
        self.stager_steps = self.get_pipe_steps(self.STAGER)
        self.consumer_step = self.get_pipe_steps(self.CONSUMER)
        if not self.producer_step:
            self.log.error("No producer in configuration")
            return False
        if not self.consumer_step:
            self.log.error("No consumer in configuration")
            return False
        return True

    def configure_ports(self):
        """
        Configures producer, stagers and consumer ZMQ ports
        Returns:
        ========
        True if everything correct
        Otherwise False
        """
        #configure connexions (zmq port) for producer (one per next step)
        try:
            for next_step_name in self.producer_step.next_steps_name:
                if not next_step_name+'_in' in self.ports:
                    self.ports[next_step_name+'_in'] = str(self.zmq_ports.pop())
                self.producer_step.connexions[next_step_name]=self.ports[next_step_name+'_in']
            self.producer_step.main_connexion_name = self.producer_step.next_steps_name[0]

            #configure port_in and connexions (zmq port)  for all stages (one per next step)
            for stage in self.stager_steps:
                if not stage.name+'_out' in self.ports:
                    self.ports[stage.name+'_out'] = str(self.zmq_ports.pop())
                stage.port_in = self.ports[stage.name+'_out']
                for next_step_name in stage.next_steps_name:
                    if not next_step_name+'_in' in self.ports:
                        self.ports[next_step_name+'_in'] = str(self.zmq_ports.pop())
                    stage.connexions[next_step_name]=self.ports[next_step_name+'_in']
                stage.main_connexion_name = stage.next_steps_name[0]

            #configure port-in  (zmq port) for consumer
            if not  self.consumer_step.name+'_out' in self.ports:
                self.ports[ self.consumer_step.name+'_out'] = str(self.zmq_ports.pop())
            self.consumer_step.port_in = self.ports[ self.consumer_step.name+'_out']
            return True
        except IndexError as e:
            self.log.error("Not enough ZMQ ports. Consider adding some port to configuration.")
        except Exception as e:
            self.log.error("Could not configure ZMQ ports. {}".format(e))
            return False

    def get_step_by_name(self, name):
        ''' Find a PipeStep in self.producer_step or  self.stager_steps or
        self.consumer_step
        Parameters:
        ===========
        name : str
            step name
        Return: PipeStep if found, otherwise None
        '''
        for step in (self.stager_steps+[self.producer_step,self.consumer_step]):
            if step.name == name:
                return step
        return None

    def instantiation(
            self, name, stage_type, processus_name=None,
            port_in=None, connexions=None, main_connexion_name=None, config=None):
        '''
        Instantiate on Python object from name found in configuration
        Parameters
        ----------
        name : str
                stage name
        stage_type	: str
        processus_name : str
        port_in : str
                step ZMQ port in
        connexions : dict
                key: StepName, value" connexion ZMQ ports
        main_connexion_name : str
            main ZMQ connection name. Connexion to use when user not precise
        '''
        stage = self.get_step_conf(name)
        module = stage['module']
        class_name = stage['class']
        obj = dynamic_class_from_module(class_name, module, self)
        if obj is None:
            raise FlowError('Cannot create instance of ' + name)
        obj.name = name
        if stage_type == self.STAGER:
            processus = StagerZmq(
                obj, port_in, processus_name,
                connexions=connexions,
                main_connexion_name = main_connexion_name)
        elif stage_type == self.PRODUCER:
            processus = ProducerZmq(
                obj, name, connexions=connexions,
                main_connexion_name = main_connexion_name)
        elif stage_type == self.CONSUMER:
            processus = ConsumerZMQ(
                obj,port_in,
                name)
        else:
            raise FlowError(
                'Cannot create instance of', name, '. Type',
                 stage_type, 'does not exist.')
        # set coroutine socket to it's stager or producer socket .
        return processus

    def get_pipe_steps(self, role):
        '''
        Create a list of Flow based framework steps from configuration and filter by role
        Parameters
        ----------
        role: str
                filter with role for step to be add in result list
                Accepted values: self.PRODUCER - self.STAGER  - self.CONSUMER
        Returns:
        --------
        PRODUCER,CONSUMER: a step name filter by specific role (PRODUCER,CONSUMER)
        STAGER: List of steps name filter by specific role
        '''
        # Create producer step
        try:
            if role == self.PRODUCER:
                prod_step = PipeStep(self.producer_conf['name'])
                prod_step.type = self.PRODUCER
                prod_step.next_steps_name = self.producer_conf['next_steps'].split(',')
                return prod_step
            elif role == self.STAGER:
                # Create stagers steps
                result = list()
                for stage_conf in self.stagers_conf:
                    try:
                        nb_processus = int(stage_conf['nb_process'])
                    except Exception as e:
                        nb_processus = 1
                    next_steps_name = stage_conf['next_steps'].split(',')
                    try: queue_limit = stage_conf['queue_limit']
                    except Exception: queue_limit = -1
                    stage_step = PipeStep(  stage_conf['name'],
                        next_steps_name=next_steps_name,nb_processus=nb_processus,
                        queue_limit = queue_limit)
                    stage_step.type = self.STAGER
                    result.append(stage_step)
                return result
            elif role == self.CONSUMER:
                # Create consumer step
                try:  queue_limit = self.consumer_conf['queue_limit']
                except: queue_limit = -1
                cons_step = PipeStep(self.consumer_conf['name'],queue_limit = queue_limit)
                cons_step.type = self.CONSUMER
                return  cons_step
            return result
        except KeyError as e:
            return None

    def def_step_for_gui(self):
        ''' Create a list (levels_for_gui) containing all steps
        Returns: the created list and actual time
        '''
        levels_for_gui = list()

        levels_for_gui.append(StagerRep(self.producer_step.name,
                            self.producer_step.next_steps_name,
                            nb_job_done=self.producer.nb_job_done,
                            running=self.producer.running,
                            step_type=StagerRep.PRODUCER))
        for step in self.stager_steps:
            nb_job_done = 0
            running = 0
            if self.mode == 'sequential':
                running = step.processus[0].running
                nb_job_done = step.processus[0].nb_job_done
                levels_for_gui.append(StagerRep(step.name,step.next_steps_name,
                                      nb_job_done=nb_job_done,
                                      running=running,
                                      nb_processus = len(step.processus)))

            elif self.mode == 'multiprocessus':
                for processus in step.processus:
                    nb_job_done+=processus.nb_job_done
                    running += processus.running
                levels_for_gui.append(StagerRep(processus.name,step.next_steps_name,
                                      nb_job_done=nb_job_done,
                                      running=running,
                                      nb_processus = len(step.processus)))

        levels_for_gui.append(StagerRep(self.consumer_step.name,
                                nb_job_done=self.consumer.nb_job_done,
                                running=self.consumer.running,
                                step_type=StagerRep.CONSUMER))

        return (levels_for_gui,time())


    def display_conf(self):
        ''' Print steps and their next_steps
        '''
        self.log.info('')
        self.log.info('------------------ Flow configuration ------------------')
        for step in  ([self.producer_step ] + self.stager_steps
            + [self.consumer_step]):
            if self.mode == 'multiprocessus':
                self.log.info('step {} (nb processus {}) '.format(step.name,str(step.nb_processus)))
            else:
                self.log.info('step {}'.format(step.name))
            for next_step_name in step.next_steps_name:
                self.log.info('--> next {} '.format(next_step_name))
        self.log.info('------------------ End Flow configuration ------------------')
        self.log.info('')

    def display_statistics(self):
        """
        Log each StagerRep statistic
        """
        steps,_ = self.def_step_for_gui()
        for step in steps:
            self.log.info(step.get_statistics())

    def start(self):
        """ run the Flow based framework steps
        """
        if self.mode == 'multiprocessus':
            self.start_multiprocessus()
        elif self.mode == 'sequential':
            self.start_sequential()

    def start_sequential(self):
        """ run the Flow based framework in sequential mode
        """
        if self.gui :
            self.socket_pub.send_multipart(
            [b'MODE', dumps('sequential')])
        start_time = time()
        #self.producer.running = 0
        # Get producer instance's generator
        self.producer = self.sequential_instances[self.producer_step.name]
        #execute producer run coroutine
        prod_gen = self.producer.run()
        # only for gui
        if self.gui :
            self.producer.running = 1
            self.send_status_to_gui()
        #for each producer output
        for prod_result in prod_gen:
            if self.gui :
                self.producer.running = 0
                self.send_status_to_gui()
            # get next stage destination and input from producer output
            msg,destination = prod_result
            # run each steps until consumer return
            while msg != None:
                destination, msg=self.run_generator(destination,msg)
            if self.gui :
                self.producer.running = 1
                self.send_status_to_gui()
        if self.gui :
            self.consumer.running=0
            self.send_status_to_gui()
            # execute finish method for all steps
        for step in self.sequential_instances.values():
            step.finish()
        end_time = time()
        self.log.info('=== SEQUENTIAL MODE END ===')
        self.log.info('Compute time {} sec'.format(end_time - start_time))
        self.display_statistics()
        # send finish to GUI and close connexions
        if self.gui :
            self.socket_pub.send_multipart(
            [b'FINISH', dumps('finish')])
            self.socket_pub.close()
            self.context.destroy()
            self.context.term()

    def run_generator(self, destination ,msg):
        """ Get step for destination. Create a genetor from its run method.
        re-enter in run_generator until Generator send values
        Parameters:
        ===========
        destination: str
            Next step name
        msg: a Pickle dumped msg
        Returns:
        ========
        Next destination and msg
        """
        stage = self.sequential_instances[destination]
        stage.running = 1
        if self.gui : self.send_status_to_gui()
        stage_gen = stage.run(msg)
        stage.running = 0
        if stage_gen:
            for result in stage_gen:
                if result:
                    msg,destination = result
                    destination, msg=self.run_generator(destination,msg)
                else:
                    msg = destination = None
        else:
            msg = destination = None
        return  (msg,destination)


    def send_status_to_gui(self):
        """
        Update all StagerRep status and send them to GUI
        """
        self.socket_pub.send_multipart([b'MODE', dumps(self.mode)])
        levels_gui,conf_time = self.def_step_for_gui()
        self.socket_pub.send_multipart(
            [b'GUI_GRAPH', dumps([conf_time,
            levels_gui])])

    def start_multiprocessus(self):
        ''' Start all Flow based framework processus.
        Regularly inform GUI of Flow based framework configuration in case of a new GUI
        instance was lunch
        Stop all processus without loosing data
        '''
        # send Flow based framework cofiguration to an optinal GUI instance
        if self.gui :
            self.send_status_to_gui()
        start_time = time()
        # Start all processus
        self.consumer.start()
        self.router.start()
        for stage in self.stagers:
            stage.start()
        self.producer.start()
        # Wait producer end of run method
        self.wait_and_send_levels(self.producer)

        # Ensure that all queues are empty and all processus are waiting for
        # new data since more that a specific tine
        while not self.wait_all_stagers(1000): # 1000 ms
            if self.gui :
                self.send_status_to_gui()
            sleep(1)

        # Now send stop to stage processus and wait they join
        for worker in self.step_processus:
            self.wait_and_send_levels(worker)
        # Stop consumer and router processus
        self.wait_and_send_levels(self.consumer)
        self.wait_and_send_levels(self.router)
        if self.gui :
            self.send_status_to_gui()
        # Wait 1 s to be sure this message will be display
        end_time = time()
        self.log.info('=== MULTUPROCESSUS MODE END ===')
        self.log.info('Compute time {} sec'.format(end_time - start_time))
        self.display_statistics()

        sleep(1)
        if self.gui :
            self.socket_pub.send_multipart(
            [b'FINISH', dumps('finish')])
            self.socket_pub.close()
            self.context.destroy()
            self.context.term()


    def wait_all_stagers(self,mintime):
        """ Verify id all steps (stage + consumers) are finised their
        jobs and waiting
        Returns:
        ========
        True if all stages queue are empty and all Processus
        wait since mintime
        Otherwise False
        """
        if self.router.total_queue_size == 0 :
            for worker in self.step_processus:
                if worker.wait_since < mintime: # 5000ms
                    return False
            return True
        return False


    def finish(self):
        self.log.info('===== Flow END ======')

    def wait_and_send_levels(self, processus_to_wait):
        '''
        Wait for a processus to join and regularly send Flow based framework state to GUI
        in case of a GUI will connect later
        Parameters:
        -----------
        processus_to_wait : processus
                processus to join
        conf_time : str
                represents time at which configuration has been built
        '''
        processus_to_wait.stop = 1

        while True:
            processus_to_wait.join(timeout=.1)
            if self.gui :
                self.send_status_to_gui()
            if not processus_to_wait.is_alive():
                return

    def get_step_conf(self, name):
        '''
        Search step by its name in self.stage_conf list,
        self.producer_conf and self.consumer_conf
        Parameters:
        -----------
        name : str
                stage name
        Returns:
        --------
        Step name matching instance, or None is not found
        '''
        if self.producer_conf['name'] == name:
            return self.producer_conf
        if self.consumer_conf['name'] == name:
            return self.consumer_conf
        for step in self.stagers_conf:
            if step['name'] == name:
                return step
        return None

    def get_stager_indice(self, name):
        '''
        Search step by its name in self.stage_conf list
        Parameters:
        -----------
        name : str
                stage name
        Returns:
        --------
        indice in list, -1 if not found
        '''
        for index, step in enumerate(self.stagers_conf):
            if step['name'] == name:
                return index
        return -1