Example #1
0
    def __init__(self, template, debug=False, dry_run=False):
        self.config = Configuration()
        self.debug = debug
        self.dry_run = dry_run

        # The work queue will figure out a valid combination of MongoDB access
        # parameters, e.g., host/port, URI, or replica set discovery via DNS
        self.wq = WorkQueue(host=self.config.mongodb_host,
                            port=self.config.mongodb_port,
                            uri=self.config.mongodb_uri,
                            srv_name=self.config.mongodb_rs_srv,
                            database=self.config.mongodb_queue_db,
                            replicaset=self.config.mongodb_rs,
                            collection=self.config.mongodb_queue_col)

        if not os.path.exists(template):
            raise Exception("Template file does not exist")
        self.template_dir = os.path.dirname(template)
        self.template_file = os.path.basename(template)
        if self.template_file == "":
            raise Exception("Template must be a file, not a directory")

        self.jinja = jinja2.Environment(loader=jinja2.FileSystemLoader(
            self.template_dir),
                                        autoescape=False)
 def init_generator(self,
                    module,
                    source,
                    node_type,
                    instance_id,
                    collectors,
                    client_context,
                    http_port,
                    sandesh_req_uve_pkg_list=None,
                    discovery_client=None,
                    connect_to_collector=True,
                    logger_class=None,
                    logger_config_file=None,
                    host_ip='127.0.0.1',
                    alarm_ack_callback=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._node_type = node_type
     self._instance_id = instance_id
     self._host_ip = host_ip
     self._client_context = client_context
     self._collectors = collectors
     self._connect_to_collector = connect_to_collector
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._init_logger(module,
                       logger_class=logger_class,
                       logger_config_file=logger_config_file)
     self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                       connect_to_collector)
     self._stats = SandeshStats()
     self._trace = trace.Trace()
     self._sandesh_request_dict = {}
     self._alarm_ack_callback = alarm_ack_callback
     self._uve_type_maps = SandeshUVETypeMaps(self._logger)
     if sandesh_req_uve_pkg_list is None:
         sandesh_req_uve_pkg_list = []
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
     sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     self._gev_httpd = None
     if http_port != -1:
         self._http_server = SandeshHttp(self, module, http_port,
                                         sandesh_req_uve_pkg_list)
         self._gev_httpd = gevent.spawn(self._http_server.start_http_server)
     primary_collector = None
     secondary_collector = None
     if self._collectors is not None:
         if len(self._collectors) > 0:
             primary_collector = self._collectors[0]
         if len(self._collectors) > 1:
             secondary_collector = self._collectors[1]
     if self._connect_to_collector:
         self._client = SandeshClient(self, primary_collector,
                                      secondary_collector, discovery_client)
         self._client.initiate()
Example #3
0
    def run(self, args):

        topic_mgr = self.get_topic_manager()
        if not topic_mgr:
            print('invalid proxy')
            return 2

        topic_name = "ProgressTopic"
        try:
            topic = topic_mgr.retrieve(topic_name)
        except IceStorm.NoSuchTopic:
            topic = topic_mgr.create(topic_name)

        publisher = topic.getPublisher()
        progress = Downloader.ProgressEventPrx.uncheckedCast(publisher)

        if progress is None:
            print("Progress topic  not found")

        work_queue = WorkQueue(progress)

        ic = self.communicator()
        adapter = ic.createObjectAdapter("FactoryAdapter")
        servant = SchedulerFactoryI(work_queue, ic)
        proxy = adapter.add(servant, ic.stringToIdentity("Factory1"))

        work_queue.start()

        adapter.activate()
        print(proxy, flush=True)
        self.shutdownOnInterrupt()
        ic.waitForShutdown()

        return 0
 def init_generator(self,
                    module,
                    source,
                    node_type,
                    instance_id,
                    collectors,
                    client_context,
                    http_port,
                    sandesh_req_uve_pkg_list=None,
                    connect_to_collector=True,
                    logger_class=None,
                    logger_config_file=None,
                    host_ip='127.0.0.1',
                    alarm_ack_callback=None,
                    config=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._node_type = node_type
     self._instance_id = instance_id
     self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or []
     self._host_ip = host_ip
     self._client_context = client_context
     self._connect_to_collector = connect_to_collector
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._send_level = SandeshLevel.INVALID
     self._init_logger(self._module,
                       logger_class=logger_class,
                       logger_config_file=logger_config_file)
     self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                       connect_to_collector)
     from sandesh_stats import SandeshMessageStatistics
     self._msg_stats = SandeshMessageStatistics()
     self._trace = trace.Trace()
     self._sandesh_request_map = {}
     self._alarm_ack_callback = alarm_ack_callback
     self._config = config or SandeshConfig.from_parser_arguments()
     self._uve_type_maps = SandeshUVETypeMaps(self._logger)
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
     self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in self._sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     if self._config.disable_object_logs is not None:
         self.disable_sending_object_logs(self._config.disable_object_logs)
     if self._config.system_logs_rate_limit is not None:
         SandeshSystem.set_sandesh_send_rate_limit(
             self._config.system_logs_rate_limit)
     self._gev_httpd = None
     if http_port != -1:
         self.run_introspect_server(http_port)
     if self._connect_to_collector:
         self._client = SandeshClient(self)
         self._client.initiate(collectors)
Example #5
0
 def __init__(self, sandesh_instance, server, event_handler,
              sandesh_msg_handler):
     self._sandesh_instance = sandesh_instance
     self._logger = sandesh_instance._logger
     self._event_handler = event_handler
     self._reader = SandeshReader(self, sandesh_msg_handler)
     self._writer = SandeshWriter(self)
     self._send_queue = WorkQueue(self._send_sandesh,
                                  self._is_ready_to_send_sandesh)
     TcpSession.__init__(self, server)
Example #6
0
def main():
    args = parse_args()

    # Create a temporary file store for task results.
    tmpdir = tempfile.mkdtemp()

    # Load the task input data here
    task_inputs = load_inputs()

    print('Creating tasks')
    tasks = generate_tasks(args.command, task_inputs,
                           args.infiles, args.outfile,
                           tmpdir, args.max_retries)

    # Create the Work Queue master that manages task distribution.
    work_queue.cctools_debug_flags_set("all")
    work_queue.cctools_debug_config_file(f'{args.name}.debug')
    work_queue.cctools_debug_config_file_size(0)
    wq = WorkQueue(port=args.port, name=args.name, shutdown=True)
    wq.specify_log(f'{args.name}.log')

    # Submit all tasks to the queue.
    print('Submitting tasks')
    for t in tasks.values():
        wq.submit(t)

    # The main loop waits for a task to get done then handles success or
    # failure accordingly
    print('Entering main loop')
    while not all([done_check(t) for t in tasks.values()]):
        t = wq.wait(10)  # This blocks for 10s or until a task is done.

        if t is not None:
            tasks[t.tag] = t  # Update the task map with the correct status

            # On success, post-process the task. If the maximum number of
            # submissions for a task has been reached, make a note. Otherwise,
            # report the failure and resubmit.
            if t.return_status == 0 and t.result == WORK_QUEUE_RESULT_SUCCESS:
                print(f'Task {t.tag} completed successfully.')
                input_idx = int(t.tag.split('_')[1])
                handle_success(t, tmpdir, args.outfile)
            elif t.result == WORK_QUEUE_RESULT_MAX_RETRIES:
                print(f'Task {t.tag} resubmitted too many times.')
            else:
                wq.submit(t)
                print(f'Task {t.tag} failed with result {t.result}')
                print(t.output)

    print('All tasks completed or hit max retries.')
    print('Cleaning up...')
    shutil.rmtree(tmpdir)

    print('Done')
 def init_generator(self, module, source, collectors, client_context, 
                    http_port, sandesh_req_uve_pkg_list=None,
                    discovery_client=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._client_context = client_context
     self._collectors = collectors
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._init_logger(module)
     self._stats = SandeshStats()
     self._trace = trace.Trace()
     self._sandesh_request_dict = {}
     self._uve_type_maps = SandeshUVETypeMaps()
     if sandesh_req_uve_pkg_list is None:
         sandesh_req_uve_pkg_list = []
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)     
     sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     self._http_server = SandeshHttp(self, module, http_port, sandesh_req_uve_pkg_list)
     primary_collector = None
     secondary_collector = None
     if self._collectors is not None:
         if len(self._collectors) > 0:
             primary_collector = self._collectors[0]
         if len(self._collectors) > 1:
             secondary_collector = self._collectors[1]
     gevent.spawn(self._http_server.start_http_server)
     self._client = SandeshClient(self, primary_collector, secondary_collector, 
                                  discovery_client)
     self._client.initiate()
 def init_generator(self,
                    module,
                    source,
                    node_type,
                    instance_id,
                    collectors,
                    client_context,
                    http_port,
                    sandesh_req_uve_pkg_list=None,
                    discovery_client=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._node_type = node_type
     self._instance_id = instance_id
     self._client_context = client_context
     self._collectors = collectors
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._init_logger(source + ':' + module + ':' + node_type + ':' \
         + instance_id)
     self._stats = SandeshStats()
     self._trace = trace.Trace()
     self._sandesh_request_dict = {}
     self._uve_type_maps = SandeshUVETypeMaps()
     if sandesh_req_uve_pkg_list is None:
         sandesh_req_uve_pkg_list = []
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
     sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     if http_port != -1:
         self._http_server = SandeshHttp(self, module, http_port,
                                         sandesh_req_uve_pkg_list)
         gevent.spawn(self._http_server.start_http_server)
     primary_collector = None
     secondary_collector = None
     if self._collectors is not None:
         if len(self._collectors) > 0:
             primary_collector = self._collectors[0]
         if len(self._collectors) > 1:
             secondary_collector = self._collectors[1]
     self._client = SandeshClient(self, primary_collector,
                                  secondary_collector, discovery_client)
     self._client.initiate()
Example #9
0
    def run(self, argv):
        work_queue = WorkQueue()
        servant = MathI(work_queue)

        broker = self.communicator()

        adapter = broker.createObjectAdapter("MathAdapter")
        print adapter.add(servant, broker.stringToIdentity("math1"))
        adapter.activate()

        work_queue.start()

        self.shutdownOnInterrupt()
        broker.waitForShutdown()

        work_queue.destroy()
        return 0
Example #10
0
 def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler):
     self._sandesh_instance = sandesh_instance
     self._logger = sandesh_instance._logger
     self._event_handler = event_handler
     self._reader = SandeshReader(self, sandesh_msg_handler)
     self._writer = SandeshWriter(self)
     self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh)
     TcpSession.__init__(self, server)
 def init_generator(self, module, source, node_type, instance_id,
                    collectors, client_context,
                    http_port, sandesh_req_uve_pkg_list=None,
                    discovery_client=None, connect_to_collector=True,
                    logger_class=None, logger_config_file=None,
                    host_ip='127.0.0.1', alarm_ack_callback=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._node_type = node_type
     self._instance_id = instance_id
     self._host_ip = host_ip
     self._client_context = client_context
     self._collectors = collectors
     self._connect_to_collector = connect_to_collector
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._send_level = SandeshLevel.INVALID
     self._init_logger(module, logger_class=logger_class,
                       logger_config_file=logger_config_file)
     self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                       connect_to_collector)
     from sandesh_stats import SandeshMessageStatistics
     self._msg_stats = SandeshMessageStatistics()
     self._trace = trace.Trace()
     self._sandesh_request_map = {}
     self._alarm_ack_callback = alarm_ack_callback
     self._uve_type_maps = SandeshUVETypeMaps(self._logger)
     if sandesh_req_uve_pkg_list is None:
         sandesh_req_uve_pkg_list = []
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
     sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     self._gev_httpd = None
     if http_port != -1:
         self._http_server = SandeshHttp(
             self, module, http_port, sandesh_req_uve_pkg_list)
         self._gev_httpd = gevent.spawn(self._http_server.start_http_server)
     primary_collector = None
     secondary_collector = None
     if self._collectors is not None:
         if len(self._collectors) > 0:
             primary_collector = self._collectors[0]
         if len(self._collectors) > 1:
             secondary_collector = self._collectors[1]
     if self._connect_to_collector:
         self._client = SandeshClient(
             self, primary_collector, secondary_collector,
             discovery_client)
         self._client.initiate()
Example #12
0
    def run(self, args):
        broker = self.communicator()
        adapter = broker.createObjectAdapter('FactoryAdapter')

        publisher = self.get_topic(STATUS_TOPIC).getPublisher()
        progress_publisher = Downloader.ProgressEventPrx.uncheckedCast(
            publisher)
        print("ProgressTopic created")

        queue = WorkQueue(progress_publisher)
        servant = SchedulerFactoryI(queue)

        proxy = adapter.addWithUUID(servant)
        sync_topic = self.get_topic(SYNC_TOPIC)
        pub = sync_topic.subscribeAndGetPublisher({}, proxy)
        servant.sync_publisher = Downloader.SyncEventPrx.uncheckedCast(pub)
        print("SynTopic created")

        print(proxy, flush=True)

        adapter.activate()
        queue.start()

        self.shutdownOnInterrupt()
        broker.waitForShutdown()
        queue.destroy()

        return 0
Example #13
0
    def run(self, args):
        '''Crea los canales de eventos y el proxy que sera mostrado por teminal
        para que el cliente pueda conectarse a el'''
        adapter = self.communicator().createObjectAdapter("FactoryAdapter")
        Downloader.SyncEventPrx.uncheckedCast(
            self.get_topic(SYNC_TOPIC).getPublisher())
        progress = Downloader.ProgressEventPrx.uncheckedCast(
            self.get_topic(PROGRESS_TOPIC).getPublisher())
        queue = WorkQueue(progress)
        proxy = adapter.addWithUUID(SchedulerFactoryI(queue))

        print(proxy, flush=True)
        topic_mgr = self.get_topic(SYNC_TOPIC)
        servant = SyncEventI()
        subscriber = adapter.addWithUUID(servant)
        qos = {}
        topic_mgr.subscribeAndGetPublisher(qos, subscriber)

        while self.communicator().isShutdown():
            subscriber.requestSync()
            time.sleep(10)

        adapter.activate()
        queue.start()
        self.shutdownOnInterrupt()
        self.communicator().waitForShutdown()
        queue.destroy()

        return 0
Example #14
0
    def run(self, argv):
        topic_mgr = self.get_topic_manager() #proxy to topic
        if not topic_mgr:
            print(': invalid proxy')
            return 2

        topic_name = "ProgressTopic"
        try:
            topic = topic_mgr.retrieve(topic_name)
        except IceStorm.NoSuchTopic:
            topic = topic_mgr.create(topic_name)

        publisher = topic.getPublisher();
        progress_topic = Example.ProgressSubscriberPrx.unCheckedCast(publisher);
        work_queue = WorkQueue(progress_top)
        servant = DownloaderI(work_queue)

        adapter = broker.createObjectAdapter("DownloaderAdapter")
        printer(adapter.add(servant, broker.stringToIdentity("dowloader1")))

        work_queue.start();
        self.shutdownOnInterrupt()
        broker.waitForShutdown()
        work_queue.destroy()
        return 0
Example #15
0
    def __init__(self, project, port, log_freq=600): # 600 seconds
        """Initialize the QMaster
        
        Parameters
        ----------
        project : 
        port : int
        log_freq : int, optional
            frequency to print info about the status of the work queue.
            In units of seconds. Default is to print every 10 minutes.
        """
        
        threading.Thread.__init__(self)
        self.project = project
        
        self.log_freq = log_freq  # print time in seconds
        self.wake_freq = 1 # seconds
        
        self.wq = WorkQueue(port, name='MSMAccelerator', catalog=True, exclusive=False)

        logger.info('WORK QUEUE MASTER LISTENING ON PORT: %d', self.wq.port)
        logger.info('(Start a local worker with >> work_queue_worker -d all localhost %d & )', self.wq.port)
        
        # method controls whether or not we need to bring back solvated_xtc as well
        if self.project.method == 'explicit':
            self.return_wet_xtc = True
        elif self.project.method == 'implicit':
            self.return_wet_xtc = False
        else:
            raise Exception("project.method must be 'explicit' or 'implicit'")
        logger.info('Return wet xtc set to %s', self.return_wet_xtc)
        
        # what does this specify algorithm do?
        self.wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
        
        # fast abort kills jobs that appear to be stragling (taking more than 1.5x average)
        #self.wq.activate_fast_abort(1.5)
        
        # setting the stop event signals for the thread to die
        self._stop = threading.Event()
        
        # the thread sets the  event every time a job returns or there are no waiting jobs
        # and it finished post processing. See the wait method
        self._mainloop_wake_event_cause = None
        self._mainloop_wake_event = threading.Event()
        
        # start the thread
        self.start()
Example #16
0
 def init_generator(self, module, source, node_type, instance_id,
                    collectors, client_context,
                    http_port, sandesh_req_uve_pkg_list=None,
                    connect_to_collector=True,
                    logger_class=None, logger_config_file=None,
                    host_ip='127.0.0.1', alarm_ack_callback=None,
                    config=None):
     self._role = self.SandeshRole.GENERATOR
     self._module = module
     self._source = source
     self._node_type = node_type
     self._instance_id = instance_id
     self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or []
     self._host_ip = host_ip
     self._client_context = client_context
     self._connect_to_collector = connect_to_collector
     self._rcv_queue = WorkQueue(self._process_rx_sandesh)
     self._init_logger(self._module, logger_class=logger_class,
                       logger_config_file=logger_config_file)
     self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                       connect_to_collector)
     from sandesh_stats import SandeshMessageStatistics
     self._msg_stats = SandeshMessageStatistics()
     self._trace = trace.Trace()
     self._sandesh_request_map = {}
     self._alarm_ack_callback = alarm_ack_callback
     self._config = config or SandeshConfig.from_parser_arguments()
     self._uve_type_maps = SandeshUVETypeMaps(self._logger)
     # Initialize the request handling
     # Import here to break the cyclic import dependency
     import sandesh_req_impl
     sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
     self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
     for pkg_name in self._sandesh_req_uve_pkg_list:
         self._create_sandesh_request_and_uve_lists(pkg_name)
     if self._config.disable_object_logs is not None:
         self.disable_sending_object_logs(self._config.disable_object_logs)
     if self._config.system_logs_rate_limit is not None:
         SandeshSystem.set_sandesh_send_rate_limit(
             self._config.system_logs_rate_limit)
     self._gev_httpd = None
     if http_port != -1:
         self.run_introspect_server(http_port)
     if self._connect_to_collector:
         self._client = SandeshClient(self)
         self._client.initiate(collectors)
async def main():
    r = redis.Redis()
    wq = WorkQueue(r, tidy_interval=5, stale_time=3)
    prod = Producer(work_queue=wq, delay=0.75)
    worker = Worker("Good Worker", work_queue=wq, delay=1, fail_rate=0.2)
    bad_worker = Worker("Bad Worker", work_queue=wq, delay=2.5, fail_rate=0.7)
    try:
        print('Start')
        await wq.schedule_tidy_task()
        await prod.start()
        await worker.start()
        await bad_worker.start()
        await asyncio.sleep(61)
    finally:
        await wq.stop_tidy_task()
        await prod.stop()
        await worker.stop()
        await bad_worker.stop()
Example #18
0
    def run(self, argv):
        work_queue = WorkQueue()
        servant = MathI(work_queue)

        broker = self.communicator()

        adapter = broker.createObjectAdapter("MathAdapter")
        print adapter.add(servant, broker.stringToIdentity("math1"))
        adapter.activate()

        work_queue.start()

        self.shutdownOnInterrupt()
        broker.waitForShutdown()

        work_queue.destroy()
        return 0
    def __init__(self, connection, logger, primary_collector,
                 secondary_collector):
        def _update_connection_state(e, status):
            from connection_info import ConnectionState
            from gen_py.process_info.ttypes import ConnectionType
            collector_addr = e.sm._active_collector
            if collector_addr is None:
                collector_addr = ''
            ConnectionState.update(conn_type=ConnectionType.COLLECTOR,
                                   name='',
                                   status=status,
                                   server_addrs=[collector_addr],
                                   message='%s to %s on %s' %
                                   (e.src, e.dst, e.event))

        #end _update_connection_state

        def _connection_state_up(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.UP)

        #end _connection_state_up

        def _connection_state_down(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.DOWN)

        #end _connection_state_down

        def _connection_state_init(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.INIT)

        #end _connection_state_init

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # Reset active and backup collector
            self._active_collector = self._connection.primary_collector()
            self._backup_collector = self._connection.secondary_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._disable != True:
                e.sm._start_idle_hold_timer()
            # update connection state
            _connection_state_down(e)

        #end _on_idle

        def _on_disconnect(e):
            # update connection state
            _connection_state_down(e)

        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._connection.reset_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._active_collector is not None:
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event=Event._EV_COLLECTOR_UNKNOWN))

        #end _on_connect

        def _on_connect_to_backup(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            # try to connect to the backup collector, if known
            if e.sm._backup_collector is not None:
                e.sm._active_collector, e.sm._backup_collector = \
                    e.sm._backup_collector, e.sm._active_collector
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(
                    Event(event=Event._EV_BACKUP_COLLECTOR_UNKNOWN))

        #end _on_connect_to_backup

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_init(e)

        #end _on_client_init

        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._connection.set_collector(e.sm_event.source)
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_up(e)

        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
            'initial': {
                'state': State._IDLE,
                'event': Event._EV_START,
                'defer': True
            },
            'events': [
                # _IDLE
                {
                    'name': Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },
                {
                    'name': Event._EV_START,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },

                # _DISCONNECT
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._DISCONNECT,
                    'dst': State._CONNECT
                },

                # _CONNECT
                {
                    'name': Event._EV_COLLECTOR_UNKNOWN,
                    'src': State._CONNECT,
                    'dst': State._DISCONNECT
                },
                {
                    'name': Event._EV_TCP_CONNECT_FAIL,
                    'src': State._CONNECT,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CONNECT,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CONNECT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECTED,
                    'src': State._CONNECT,
                    'dst': State._CLIENT_INIT
                },

                # _CONNECT_TO_BACKUP
                {
                    'name': Event._EV_BACKUP_COLLECTOR_UNKNOWN,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECT_FAIL,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECTED,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._CLIENT_INIT
                },

                # _CLIENT_INIT
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CLOSE,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                    'src': State._CLIENT_INIT,
                    'dst': State._ESTABLISHED
                },

                # _ESTABLISHED
                {
                    'name': Event._EV_TCP_CLOSE,
                    'src': State._ESTABLISHED,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_STOP,
                    'src': State._ESTABLISHED,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._ESTABLISHED,
                    'dst': State._CONNECT
                }
            ],
            'callbacks': {
                'on' + State._IDLE: _on_idle,
                'on' + State._CONNECT: _on_connect,
                'on' + State._CONNECT_TO_BACKUP: _on_connect_to_backup,
                'on' + State._CLIENT_INIT: _on_client_init,
                'on' + State._ESTABLISHED: _on_established,
            }
        })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._disable = False
        self._idle_hold_timer = None
        self._connect_timer = None
        self._active_collector = primary_collector
        self._backup_collector = secondary_collector
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)
class SandeshStateMachine(object):

    _IDLE_HOLD_TIME = 5  # in seconds
    _CONNECT_TIME = 30  # in seconds

    def __init__(self, connection, logger, primary_collector,
                 secondary_collector):
        def _update_connection_state(e, status):
            from connection_info import ConnectionState
            from gen_py.process_info.ttypes import ConnectionType
            collector_addr = e.sm._active_collector
            if collector_addr is None:
                collector_addr = ''
            ConnectionState.update(conn_type=ConnectionType.COLLECTOR,
                                   name='',
                                   status=status,
                                   server_addrs=[collector_addr],
                                   message='%s to %s on %s' %
                                   (e.src, e.dst, e.event))

        #end _update_connection_state

        def _connection_state_up(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.UP)

        #end _connection_state_up

        def _connection_state_down(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.DOWN)

        #end _connection_state_down

        def _connection_state_init(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.INIT)

        #end _connection_state_init

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # Reset active and backup collector
            self._active_collector = self._connection.primary_collector()
            self._backup_collector = self._connection.secondary_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._disable != True:
                e.sm._start_idle_hold_timer()
            # update connection state
            _connection_state_down(e)

        #end _on_idle

        def _on_disconnect(e):
            # update connection state
            _connection_state_down(e)

        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._connection.reset_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._active_collector is not None:
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event=Event._EV_COLLECTOR_UNKNOWN))

        #end _on_connect

        def _on_connect_to_backup(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            # try to connect to the backup collector, if known
            if e.sm._backup_collector is not None:
                e.sm._active_collector, e.sm._backup_collector = \
                    e.sm._backup_collector, e.sm._active_collector
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(
                    Event(event=Event._EV_BACKUP_COLLECTOR_UNKNOWN))

        #end _on_connect_to_backup

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_init(e)

        #end _on_client_init

        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._connection.set_collector(e.sm_event.source)
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_up(e)

        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
            'initial': {
                'state': State._IDLE,
                'event': Event._EV_START,
                'defer': True
            },
            'events': [
                # _IDLE
                {
                    'name': Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },
                {
                    'name': Event._EV_START,
                    'src': State._IDLE,
                    'dst': State._CONNECT
                },

                # _DISCONNECT
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._DISCONNECT,
                    'dst': State._CONNECT
                },

                # _CONNECT
                {
                    'name': Event._EV_COLLECTOR_UNKNOWN,
                    'src': State._CONNECT,
                    'dst': State._DISCONNECT
                },
                {
                    'name': Event._EV_TCP_CONNECT_FAIL,
                    'src': State._CONNECT,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CONNECT,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CONNECT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECTED,
                    'src': State._CONNECT,
                    'dst': State._CLIENT_INIT
                },

                # _CONNECT_TO_BACKUP
                {
                    'name': Event._EV_BACKUP_COLLECTOR_UNKNOWN,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECT_FAIL,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CONNECTED,
                    'src': State._CONNECT_TO_BACKUP,
                    'dst': State._CLIENT_INIT
                },

                # _CLIENT_INIT
                {
                    'name': Event._EV_CONNECT_TIMER_EXPIRED,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_TCP_CLOSE,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._CLIENT_INIT,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                    'src': State._CLIENT_INIT,
                    'dst': State._ESTABLISHED
                },

                # _ESTABLISHED
                {
                    'name': Event._EV_TCP_CLOSE,
                    'src': State._ESTABLISHED,
                    'dst': State._CONNECT_TO_BACKUP
                },
                {
                    'name': Event._EV_STOP,
                    'src': State._ESTABLISHED,
                    'dst': State._IDLE
                },
                {
                    'name': Event._EV_COLLECTOR_CHANGE,
                    'src': State._ESTABLISHED,
                    'dst': State._CONNECT
                }
            ],
            'callbacks': {
                'on' + State._IDLE: _on_idle,
                'on' + State._CONNECT: _on_connect,
                'on' + State._CONNECT_TO_BACKUP: _on_connect_to_backup,
                'on' + State._CLIENT_INIT: _on_client_init,
                'on' + State._ESTABLISHED: _on_established,
            }
        })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._disable = False
        self._idle_hold_timer = None
        self._connect_timer = None
        self._active_collector = primary_collector
        self._backup_collector = secondary_collector
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)

    #end __init__

    # Public functions

    def initialize(self):
        self.enqueue_event(Event(event=Event._EV_START))

    #end initialize

    def session(self):
        return self._session

    #end session

    def state(self):
        return self._fsm.current

    #end state

    def shutdown(self):
        self._disable = True
        self.enqueue_event(Event(event=Event._EV_STOP))

    #end shutdown

    def set_admin_state(self, down):
        if down == True:
            self._disable = True
            self.enqueue_event(Event(event=Event._EV_STOP))
        else:
            self._disable = False
            self.enqueue_event(Event(event=Event._EV_START))

    #end set_admin_state

    def connect_count(self):
        return self._connects

    #end connect_count

    def active_collector(self):
        return self._active_collector

    #end active_collector

    def backup_collector(self):
        return self._backup_collector

    #end backup_collector

    def enqueue_event(self, event):
        self._event_queue.enqueue(event)

    #end enqueue_event

    def on_session_event(self, session, event):
        if session is not self._session:
            self._logger.error(
                "Ignore session event [%d] received for old session" % (event))
            return
        if SandeshSession.SESSION_ESTABLISHED == event:
            self._logger.info("Session Event: TCP Connected")
            self.enqueue_event(
                Event(event=Event._EV_TCP_CONNECTED, session=session))
        elif SandeshSession.SESSION_ERROR == event:
            self._logger.error("Session Event: TCP Connect Fail")
            self.enqueue_event(
                Event(event=Event._EV_TCP_CONNECT_FAIL, session=session))
        elif SandeshSession.SESSION_CLOSE == event:
            self._logger.error("Session Event: TCP Connection Closed")
            self.enqueue_event(
                Event(event=Event._EV_TCP_CLOSE, session=session))
        else:
            self._logger.error("Received unknown session event [%d]" % (event))

    #end on_session_event

    def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector):
        if sandesh_ctrl.success == True:
            self.enqueue_event(
                Event(event=Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                      session=session,
                      msg=sandesh_ctrl,
                      source=collector))
        else:
            # Negotiation with the Collector failed, reset the
            # connection and retry after sometime.
            self._logger.error("Negotiation with the Collector %s failed." %
                               (collector))
            self._session.close()

    #end on_sandesh_ctrl_msg_receive

    def on_sandesh_uve_msg_send(self, sandesh_uve):
        self.enqueue_event(
            Event(event=Event._EV_SANDESH_UVE_SEND, msg=sandesh_uve))

    #end on_sandesh_uve_msg_send

    # Private functions

    def _create_session(self):
        assert self._session is None
        col_info = self._active_collector.split(':')
        collector = (col_info[0], int(col_info[1]))
        self._session = SandeshSession(self._connection.sandesh_instance(),
                                       collector, self.on_session_event,
                                       self._connection._receive_sandesh_msg)

    #end _create_session

    def _delete_session(self):
        if self._session:
            self._session.close()
            self._session = None
            self._connection.reset_collector()

    #end _delete_session

    def _start_idle_hold_timer(self):
        if self._idle_hold_timer is None:
            if self._IDLE_HOLD_TIME:
                self._idle_hold_timer = gevent.spawn_later(
                    self._IDLE_HOLD_TIME, self._idle_hold_timer_expiry_handler)
            else:
                self.enqueue_event(
                    Event(event=Event._EV_IDLE_HOLD_TIMER_EXPIRED))

    #end _start_idle_hold_timer

    def _cancel_idle_hold_timer(self):
        if self._idle_hold_timer is not None:
            gevent.kill(self._idle_hold_timer)
            self._idle_hold_timer = None

    #end _cancel_idle_hold_timer

    def _idle_hold_timer_expiry_handler(self):
        self._idle_hold_timer = None
        self.enqueue_event(Event(event=Event._EV_IDLE_HOLD_TIMER_EXPIRED))

    #end _idle_hold_timer_expiry_handler

    def _start_connect_timer(self):
        if self._connect_timer is None:
            self._connect_timer = gevent.spawn_later(
                self._CONNECT_TIME, self._connect_timer_expiry_handler,
                self._session)

    #end _start_connect_timer

    def _cancel_connect_timer(self):
        if self._connect_timer is not None:
            gevent.kill(self._connect_timer)
            self._connect_timer = None

    #end _cancel_connect_timer

    def _connect_timer_expiry_handler(self, session):
        self._connect_timer = None
        self.enqueue_event(
            Event(event=Event._EV_CONNECT_TIMER_EXPIRED, session=session))

    #end _connect_timer_expiry_handler

    def _is_ready_to_dequeue_event(self):
        return True

    #end _is_ready_to_dequeue_event

    def _log_event(self, event):
        if self._fsm.current == State._ESTABLISHED and \
           event.event == Event._EV_SANDESH_UVE_SEND:
            return False
        return True

    #end _log_event

    def _dequeue_event(self, event):
        if self._log_event(event):
            self._logger.info("Processing event[%s] in state[%s]" \
                              % (event.event, self._fsm.current))
        if event.session is not None and event.session is not self._session:
            self._logger.info("Ignore event [%s] received for old session" \
                              % (event.event))
            return
        if event.event == Event._EV_COLLECTOR_CHANGE:
            old_active_collector = self._active_collector
            self._active_collector = event.primary_collector
            self._backup_collector = event.secondary_collector
            if old_active_collector == self._active_collector:
                self._logger.info("No change in active collector. Ignore event [%s]" \
                                  % (event.event))
                return
        if event.event == Event._EV_SANDESH_UVE_SEND:
            if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT:
                self._connection.handle_sandesh_uve_msg(event.msg)
            else:
                self._connection.sandesh_instance().msg_stats(
                ).update_tx_stats(event.msg.__class__.__name__, 0,
                                  SandeshTxDropReason.WrongClientSMState)
                self._logger.info("Discarding event[%s] in state[%s]" \
                                  % (event.event, self._fsm.current))
        elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \
                self._fsm.current == State._ESTABLISHED:
            self._connection.handle_sandesh_ctrl_msg(event.msg)
        elif self._fsm.cannot(event.event) is True:
            self._logger.info("Unconsumed event[%s] in state[%s]" \
                              % (event.event, self._fsm.current))
        else:
            prev_state = self.state()
            getattr(self._fsm, event.event)(sm=self, sm_event=event)
            # Log state transition
            self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \
                              % (event.event, prev_state, self.state()))
class Sandesh(object):
    _DEFAULT_LOG_FILE = SandeshLogger._DEFAULT_LOG_FILE
    _DEFAULT_SYSLOG_FACILITY = SandeshLogger._DEFAULT_SYSLOG_FACILITY

    class SandeshRole:
        INVALID = 0
        GENERATOR = 1
        COLLECTOR = 2
    # end class SandeshRole

    def __init__(self):
        self._context = ''
        self._scope = ''
        self._module = ''
        self._source = ''
        self._node_type = ''
        self._instance_id = ''
        self._timestamp = 0
        self._versionsig = 0
        self._type = 0
        self._hints = 0
        self._client_context = ''
        self._client = None
        self._role = self.SandeshRole.INVALID
        self._logger = None
        self._level = SandeshLevel.INVALID
        self._category = ''
        self._send_queue_enabled = True
        self._http_server = None
        self._connect_to_collector = True
    # end __init__

    # Public functions

    def init_generator(self, module, source, node_type, instance_id,
                       collectors, client_context, 
                       http_port, sandesh_req_uve_pkg_list=None,
                       discovery_client=None, connect_to_collector=True):
        self._role = self.SandeshRole.GENERATOR
        self._module = module
        self._source = source
        self._node_type = node_type
        self._instance_id = instance_id
        self._client_context = client_context
        self._collectors = collectors
        self._connect_to_collector = connect_to_collector
        self._rcv_queue = WorkQueue(self._process_rx_sandesh)
        self._init_logger(source + ':' + module + ':' + node_type + ':' \
            + instance_id)
        self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
            connect_to_collector)
        self._stats = SandeshStats()
        self._trace = trace.Trace()
        self._sandesh_request_dict = {}
        self._uve_type_maps = SandeshUVETypeMaps(self._logger)
        if sandesh_req_uve_pkg_list is None:
            sandesh_req_uve_pkg_list = []
        # Initialize the request handling
        # Import here to break the cyclic import dependency
        import sandesh_req_impl
        sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
        sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
        for pkg_name in sandesh_req_uve_pkg_list:
            self._create_sandesh_request_and_uve_lists(pkg_name)
        self._gev_httpd = None
        if http_port != -1:
            self._http_server = SandeshHttp(
                self, module, http_port, sandesh_req_uve_pkg_list)
            self._gev_httpd = gevent.spawn(
                    self._http_server.start_http_server)
        primary_collector = None
        secondary_collector = None
        if self._collectors is not None:
            if len(self._collectors) > 0:
                primary_collector = self._collectors[0]
            if len(self._collectors) > 1:
                secondary_collector = self._collectors[1]
        if self._connect_to_collector:
            self._client = SandeshClient(
                self, primary_collector, secondary_collector,
                discovery_client)
            self._client.initiate()
    # end init_generator

    def uninit(self):
        self.kill_httpd()

    def kill_httpd(self):
        if self._gev_httpd:
            try:
                self._gev_httpd.kill()
            except Exception as e:
                self._logger.debug(str(e))

    def record_port(self, name, port):
        pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name)
        try:
            pipeout = os.open(pipe_name, os.O_WRONLY)
        except:
            self._logger.error('Cannot write %s_port %d to %s' % (name, port, pipe_name))
        else:
            self._logger.error('Writing %s_port %d to %s' % (name, port, pipe_name))
            os.write(pipeout, '%d\n' % port)
            os.close(pipeout)
        
    def logger(self):
        return self._logger
    # end logger

    def sandesh_logger(self):
        return self._sandesh_logger
    # end sandesh_logger

    def set_logging_params(self, enable_local_log=False, category='',
                           level=SandeshLevel.SYS_INFO,
                           file=SandeshLogger._DEFAULT_LOG_FILE,
                           enable_syslog=False,
                           syslog_facility=_DEFAULT_SYSLOG_FACILITY):
        self._sandesh_logger.set_logging_params(
            enable_local_log, category, level, file,
            enable_syslog, syslog_facility)
    # end set_logging_params

    def set_local_logging(self, enable_local_log):
        self._sandesh_logger.set_local_logging(enable_local_log)
    # end set_local_logging

    def set_logging_level(self, level):
        self._sandesh_logger.set_logging_level(level)
    # end set_logging_level

    def set_logging_category(self, category):
        self._sandesh_logger.set_logging_category(category)
    # end set_logging_category

    def set_logging_file(self, file):
        self._sandesh_logger.set_logging_file(file)
    # end set_logging_file

    def is_send_queue_enabled(self):
        return self._send_queue_enabled
    # end is_send_queue_enabled

    def is_connect_to_collector_enabled(self):
        return self._connect_to_collector
    # end is_connect_to_collector_enabled

    def set_send_queue(self, enable):
        if self._send_queue_enabled != enable:
            self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s",
                              self._send_queue_enabled, enable)
            self._send_queue_enabled = enable
            if enable:
                connection = self._client.connection()
                if connection and connection.session():
                    connection.session().send_queue().may_be_start_runner()
    # end set_send_queue

    def init_collector(self):
        pass
    # end init_collector

    def stats(self):
        return self._stats
    # end stats

    @classmethod
    def next_seqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 1
        else:
            cls._lseqnum += 1
        return cls._lseqnum
    # end next_seqnum

    @classmethod
    def lseqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 0
        return cls._lseqnum
    # end lseqnum

    def module(self):
        return self._module
    # end module

    def source_id(self):
        return self._source
    # end source_id

    def node_type(self):
        return self._node_type
    #end node_type

    def instance_id(self):
        return self._instance_id
    #end instance_id

    def scope(self):
        return self._scope
    # end scope

    def context(self):
        return self._context
    # end context

    def seqnum(self):
        return self._seqnum
    # end seqnum

    def timestamp(self):
        return self._timestamp
    # end timestamp

    def versionsig(self):
        return self._versionsig
    # end versionsig

    def type(self):
        return self._type
    # end type

    def hints(self):
        return self._hints
    # end hints

    def client(self):
        return self._client
    # end client

    def level(self):
        return self._level
    # end level

    def category(self):
        return self._category
    # end category

    def validate(self):
        return
    # end validate

    def is_local_logging_enabled(self):
        return self._sandesh_logger.is_local_logging_enabled()
    # end is_local_logging_enabled

    def logging_level(self):
        return self._sandesh_logger.logging_level()
    # end logging_level

    def logging_category(self):
        return self._sandesh_logger.logging_category()
    # end logging_category

    def is_syslog_logging_enabled(self):
        return self._sandesh_logger.is_syslog_logging_enabled()
    #end is_syslog_logging_enabled

    def logging_syslog_facility(self):
        return self._sandesh_logger.logging_syslog_facility()
    #end logging_syslog_facility

    def is_unit_test(self):
        return self._role == self.SandeshRole.INVALID
    # end is_unit_test

    def handle_test(self, sandesh_init):
        if sandesh_init.is_unit_test() or self._is_level_ut():
            if self._is_logging_allowed(sandesh_init):
                sandesh_init._logger.debug(self.log())
                return True
        return False

    def is_logging_allowed(self, sandesh_init):
        if not sandesh_init.is_local_logging_enabled():
            return False

        logging_level = sandesh_init.logging_level()
        level_allowed = logging_level >= self._level

        logging_category = sandesh_init.logging_category()
        if logging_category is None or len(logging_category) == 0:
            category_allowed = True
        else:
            category_allowed = logging_category == self._category

        return level_allowed and category_allowed
    # end is_logging_allowed

    def enqueue_sandesh_request(self, sandesh):
        self._rcv_queue.enqueue(sandesh)
    # end enqueue_sandesh_request

    def send_sandesh(self, tx_sandesh):
        if self._client:
            ret = self._client.send_sandesh(tx_sandesh)
        else:
            if self._connect_to_collector:
                self._logger.error('SANDESH: No Client: %s', tx_sandesh.log())
            else:
                self._logger.log(
                    SandeshLogger.get_py_logger_level(tx_sandesh.level()),
                    tx_sandesh.log())
    # end send_sandesh

    def send_generator_info(self):
        from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \
            ModuleClientState, SandeshModuleClientTrace
        client_info = SandeshClientInfo()
        try:
            client_start_time = self._start_time
        except:
            self._start_time = UTCTimestampUsec()
        finally:
            client_info.start_time = self._start_time
            client_info.pid = os.getpid()
            if self._http_server is not None:
                client_info.http_port = self._http_server.get_port()
            client_info.collector_name = self._client.connection().collector()
            client_info.status = self._client.connection().state()
            client_info.successful_connections = \
                self._client.connection().statemachine().connect_count()
            client_info.primary = self._client.connection().primary_collector()
            if client_info.primary is None:
                client_info.primary = ''
            client_info.secondary = \
                self._client.connection().secondary_collector()
            if client_info.secondary is None:
                client_info.secondary = ''
            module_state = ModuleClientState(name=self._source + ':' +
                                             self._node_type + ':' + 
                                             self._module + ':' +
                                             self._instance_id, 
                                             client_info=client_info)
            generator_info = SandeshModuleClientTrace(
                data=module_state, sandesh=self)
            generator_info.send(sandesh=self)
    # end send_generator_info

    def get_sandesh_request_object(self, request):
        try:
            req_module = self._sandesh_request_dict[request]
        except KeyError:
            self._logger.error('Invalid Sandesh Request "%s"' % (request))
            return None
        else:
            if req_module:
                try:
                    imp_module = importlib.import_module(req_module)
                except ImportError:
                    self._logger.error(
                        'Failed to import Module "%s"' % (req_module))
                else:
                    try:
                        sandesh_request = getattr(imp_module, request)()
                        return sandesh_request
                    except AttributeError:
                        self._logger.error(
                            'Failed to create Sandesh Request "%s"' %
                            (request))
                        return None
            else:
                self._logger.error(
                    'Sandesh Request "%s" not implemented' % (request))
                return None
    # end get_sandesh_request_object

    def trace_enable(self):
        self._trace.TraceOn()
    # end trace_enable

    def trace_disable(self):
        self._trace.TraceOff()
    # end trace_disable

    def is_trace_enabled(self):
        return self._trace.IsTraceOn()
    # end is_trace_enabled

    def trace_buffer_create(self, name, size, enable=True):
        self._trace.TraceBufAdd(name, size, enable)
    # end trace_buffer_create

    def trace_buffer_delete(self, name):
        self._trace.TraceBufDelete(name)
    # end trace_buffer_delete

    def trace_buffer_enable(self, name):
        self._trace.TraceBufOn(name)
    # end trace_buffer_enable

    def trace_buffer_disable(self, name):
        self._trace.TraceBufOff(name)
    # end trace_buffer_disable

    def is_trace_buffer_enabled(self, name):
        return self._trace.IsTraceBufOn(name)
    # end is_trace_buffer_enabled

    def trace_buffer_list_get(self):
        return self._trace.TraceBufListGet()
    # end trace_buffer_list_get

    def trace_buffer_size_get(self, name):
        return self._trace.TraceBufSizeGet(name)
    # end trace_buffer_size_get

    def trace_buffer_read(self, name, read_context, count, read_cb):
        self._trace.TraceRead(name, read_context, count, read_cb)
    # end trace_buffer_read

    def trace_buffer_read_done(self, name, context):
        self._trace.TraceReadDone(name, context)
    # end trace_buffer_read_done

    # API to send the trace buffer to the Collector.
    # If trace count is not specified/or zero, then the entire trace buffer
    # is sent to the Collector.
    # [Note] No duplicate trace message sent to the Collector. i.e., If there
    # is no trace message added between two consequent calls to this API, then
    # no trace message is sent to the Collector.
    def send_sandesh_trace_buffer(self, trace_buf, count=0):
        trace_req_runner = SandeshTraceRequestRunner(sandesh=self,
                                                     request_buffer_name=
                                                     trace_buf,
                                                     request_context='',
                                                     read_context='Collector',
                                                     request_count=count)
        trace_req_runner.Run()
    # end send_sandesh_trace_buffer

    # Private functions

    def _is_level_ut(self):
        return self._level >= SandeshLevel.UT_START and \
            self._level <= SandeshLevel.UT_END
    # end _is_level_ut

    def _create_task(self):
        return gevent.spawn(self._runner.run_for_ever)
    # end _create_task

    def _process_rx_sandesh(self, rx_sandesh):
        handle_request_fn = getattr(rx_sandesh, "handle_request", None)
        if callable(handle_request_fn):
            handle_request_fn(rx_sandesh)
        else:
            self._logger.error('Sandesh Request "%s" not implemented' %
                               (rx_sandesh.__class__.__name__))
    # end _process_rx_sandesh

    def _create_sandesh_request_and_uve_lists(self, package):
        try:
            imp_pkg = __import__(package)
        except ImportError:
            self._logger.error('Failed to import package "%s"' % (package))
        else:
            try:
                pkg_path = imp_pkg.__path__
            except AttributeError:
                self._logger.error(
                    'Failed to get package [%s] path' % (package))
                return
            for importer, mod, ispkg in \
                pkgutil.walk_packages(path=pkg_path,
                                      prefix=imp_pkg.__name__ + '.'):
                if not ispkg:
                    module = mod.rsplit('.', 1)[-1]
                    if 'ttypes' == module:
                        self._logger.debug(
                            'Add Sandesh requests in module "%s"' % (mod))
                        self._add_sandesh_request(mod)
                        self._logger.debug(
                            'Add Sandesh UVEs in module "%s"' % (mod))
                        self._add_sandesh_uve(mod)
                        self._logger.debug(
                            'Add Sandesh Alarms in module "%s"' %(mod))
                        self._add_sandesh_alarm(mod)
    # end _create_sandesh_request_and_uve_lists

    def _add_sandesh_request(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            try:
                sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST')
            except AttributeError:
                self._logger.error(
                    '"%s" module does not have sandesh request list' % (mod))
            else:
                # Add sandesh requests to the dictionary.
                for req in sandesh_req_list:
                    self._sandesh_request_dict[req] = mod
    # end _add_sandesh_request

    def _get_sandesh_uve_list(self, imp_module):
        try:
            sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh UVE list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_uve_list
    # end _get_sandesh_uve_list

    def _get_sandesh_uve_data_list(self, imp_module):
        try:
            sandesh_uve_data_list = getattr(imp_module, '_SANDESH_UVE_DATA_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh UVE data list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_uve_data_list
    # end _get_sandesh_uve_data_list

    def _add_sandesh_uve(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_uve_list = self._get_sandesh_uve_list(imp_module)
            sandesh_uve_data_list = self._get_sandesh_uve_data_list(imp_module)
            if sandesh_uve_list is None or sandesh_uve_data_list is None:
                return
            if len(sandesh_uve_list) != len(sandesh_uve_data_list):
                self._logger.error(
                    '"%s" module sandesh UVE and UVE data list do not match' %
                     (mod))
                return
            sandesh_uve_info_list = zip(sandesh_uve_list, sandesh_uve_data_list)
            # Register sandesh UVEs
            for uve_type_name, uve_data_type_name in sandesh_uve_info_list:
                SandeshUVEPerTypeMap(self, SandeshType.UVE,
                                     uve_type_name, uve_data_type_name, mod)
    # end _add_sandesh_uve

    def _get_sandesh_alarm_list(self, imp_module):
        try:
            sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh Alarm list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_alarm_list
    # end _get_sandesh_alarm_list

    def _get_sandesh_alarm_data_list(self, imp_module):
        try:
            sandesh_alarm_data_list = getattr(imp_module, '_SANDESH_ALARM_DATA_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh Alarm data list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_alarm_data_list
    # end _get_sandesh_alarm_data_list

    def _add_sandesh_alarm(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module)
            sandesh_alarm_data_list = self._get_sandesh_alarm_data_list(imp_module)
            if sandesh_alarm_list is None or sandesh_alarm_data_list is None:
                return
            if len(sandesh_alarm_list) != len(sandesh_alarm_data_list):
                self._logger.error(
                    '"%s" module sandesh Alarm and Alarm data list do not match' %
                     (mod))
                return
            sandesh_alarm_info_list = zip(sandesh_alarm_list,
                                          sandesh_alarm_data_list)
            # Register sandesh Alarms
            for alarm_type_name, alarm_data_type_name in sandesh_alarm_info_list:
                SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type_name,
                                     alarm_data_type_name, mod)
    # end _add_sandesh_alarm

    def _init_logger(self, generator):
        if not generator:
            generator = 'sandesh'
        self._sandesh_logger = SandeshLogger(generator)
        self._logger = self._sandesh_logger.logger()
Example #22
0
class SandeshSession(TcpSession):
    _KEEPALIVE_IDLE_TIME = 15  # in secs
    _KEEPALIVE_INTERVAL = 3  # in secs
    _KEEPALIVE_PROBES = 5
    _TCP_USER_TIMEOUT_OPT = 18
    _TCP_USER_TIMEOUT_VAL = 30000  # ms

    def __init__(self, sandesh_instance, server, event_handler,
                 sandesh_msg_handler):
        self._sandesh_instance = sandesh_instance
        self._logger = sandesh_instance._logger
        self._event_handler = event_handler
        self._reader = SandeshReader(self, sandesh_msg_handler)
        self._writer = SandeshWriter(self)
        self._send_queue = WorkQueue(self._send_sandesh,
                                     self._is_ready_to_send_sandesh)
        TcpSession.__init__(self, server)

    # end __init__

    # Public functions

    def sandesh_instance(self):
        return self._sandesh_instance

    # end sandesh_instance

    def is_send_queue_empty(self):
        return self._send_queue.is_queue_empty()

    # end is_send_queue_empty

    def is_connected(self):
        return self._connected

    # end is_connected

    def enqueue_sandesh(self, sandesh):
        self._send_queue.enqueue(sandesh)

    # end enqueue_sandesh

    def send_queue(self):
        return self._send_queue

    # end send_queue

    # Overloaded functions from TcpSession

    def connect(self):
        TcpSession.connect(self, timeout=5)

    # end connect

    def _on_read(self, buf):
        if self._reader.read_msg(buf) < 0:
            self._logger.error('SandeshReader Error. Close Collector session')
            self.close()

    # end _on_read

    def _handle_event(self, event):
        self._event_handler(self, event)

    # end _handle_event

    def _set_socket_options(self):
        self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
        if hasattr(socket, 'TCP_KEEPIDLE'):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE,
                                    self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, 'TCP_KEEPALIVE'):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE,
                                    self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, 'TCP_KEEPINTVL'):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL,
                                    self._KEEPALIVE_INTERVAL)
        if hasattr(socket, 'TCP_KEEPCNT'):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT,
                                    self._KEEPALIVE_PROBES)
        try:
            self._socket.setsockopt(socket.IPPROTO_TCP,
                                    self._TCP_USER_TIMEOUT_OPT,
                                    self._TCP_USER_TIMEOUT_VAL)
        except:
            self._logger.error(
                'setsockopt failed: option %d, value %d' %
                (self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL))

    # end _set_socket_options

    # Private functions

    def _send_sandesh(self, sandesh):
        if self._send_queue.is_queue_empty():
            more = False
        else:
            more = True
        if not self._connected:
            if self._sandesh_instance.is_logging_dropped_allowed(sandesh):
                self._logger.error("SANDESH: %s: %s" %
                                   ("Not connected", sandesh.log()))
            return
        if sandesh.is_logging_allowed(self._sandesh_instance):
            self._logger.log(
                SandeshLogger.get_py_logger_level(sandesh.level()),
                sandesh.log())
        self._writer.send_msg(sandesh, more)

    # end _send_sandesh

    def _is_ready_to_send_sandesh(self):
        return self._sandesh_instance.is_send_queue_enabled()
class SandeshSession(TcpSession):
    _KEEPALIVE_IDLE_TIME = 15  # in secs
    _KEEPALIVE_INTERVAL = 3  # in secs
    _KEEPALIVE_PROBES = 5
    _TCP_USER_TIMEOUT_OPT = 18
    _TCP_USER_TIMEOUT_VAL = 30000  # ms

    def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler):
        self._sandesh_instance = sandesh_instance
        self._logger = sandesh_instance._logger
        self._event_handler = event_handler
        self._reader = SandeshReader(self, sandesh_msg_handler)
        self._writer = SandeshWriter(self)
        self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh)
        TcpSession.__init__(self, server)

    # end __init__

    # Public functions

    def sandesh_instance(self):
        return self._sandesh_instance

    # end sandesh_instance

    def is_send_queue_empty(self):
        return self._send_queue.is_queue_empty()

    # end is_send_queue_empty

    def is_connected(self):
        return self._connected

    # end is_connected

    def enqueue_sandesh(self, sandesh):
        self._send_queue.enqueue(sandesh)

    # end enqueue_sandesh

    def send_queue(self):
        return self._send_queue

    # end send_queue

    # Overloaded functions from TcpSession

    def connect(self):
        TcpSession.connect(self, timeout=5)

    # end connect

    def _on_read(self, buf):
        if self._reader.read_msg(buf) < 0:
            self._logger.error("SandeshReader Error. Close Collector session")
            self.close()

    # end _on_read

    def _handle_event(self, event):
        self._event_handler(self, event)

    # end _handle_event

    def _set_socket_options(self):
        self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
        if hasattr(socket, "TCP_KEEPIDLE"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, "TCP_KEEPALIVE"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, "TCP_KEEPINTVL"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, self._KEEPALIVE_INTERVAL)
        if hasattr(socket, "TCP_KEEPCNT"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, self._KEEPALIVE_PROBES)
        try:
            self._socket.setsockopt(socket.IPPROTO_TCP, self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL)
        except:
            self._logger.error(
                "setsockopt failed: option %d, value %d" % (self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL)
            )

    # end _set_socket_options

    # Private functions

    def _send_sandesh(self, sandesh):
        if self._send_queue.is_queue_empty():
            more = False
        else:
            more = True
        if not self._connected:
            if self._sandesh_instance.is_logging_dropped_allowed(sandesh):
                self._logger.error("SANDESH: %s: %s" % ("Not connected", sandesh.log()))
            self._sandesh_instance.msg_stats().update_tx_stats(
                sandesh.__class__.__name__, 0, SandeshTxDropReason.SessionNotConnected
            )
            return
        if sandesh.is_logging_allowed(self._sandesh_instance):
            self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log())
        self._writer.send_msg(sandesh, more)

    # end _send_sandesh

    def _is_ready_to_send_sandesh(self):
        return self._sandesh_instance.is_send_queue_enabled()
class Sandesh(object):
    _DEFAULT_LOG_FILE = sand_logger.SandeshLogger._DEFAULT_LOG_FILE
    _DEFAULT_SYSLOG_FACILITY = (
        sand_logger.SandeshLogger._DEFAULT_SYSLOG_FACILITY)

    class SandeshRole:
        INVALID = 0
        GENERATOR = 1
        COLLECTOR = 2

    # end class SandeshRole

    def __init__(self):
        self._context = ''
        self._scope = ''
        self._module = ''
        self._source = ''
        self._node_type = ''
        self._instance_id = ''
        self._timestamp = 0
        self._versionsig = 0
        self._type = 0
        self._hints = 0
        self._client_context = ''
        self._client = None
        self._role = self.SandeshRole.INVALID
        self._logger = None
        self._level = SandeshLevel.INVALID
        self._category = ''
        self._send_queue_enabled = True
        self._http_server = None
        self._connect_to_collector = True
        self._disable_sending_object_logs = False
        self._disable_sending_all_messages = False

    # end __init__

    # Public functions

    def init_generator(self,
                       module,
                       source,
                       node_type,
                       instance_id,
                       collectors,
                       client_context,
                       http_port,
                       sandesh_req_uve_pkg_list=None,
                       connect_to_collector=True,
                       logger_class=None,
                       logger_config_file=None,
                       host_ip='127.0.0.1',
                       alarm_ack_callback=None,
                       config=None):
        self._role = self.SandeshRole.GENERATOR
        self._module = module
        self._source = source
        self._node_type = node_type
        self._instance_id = instance_id
        self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or []
        self._host_ip = host_ip
        self._client_context = client_context
        self._connect_to_collector = connect_to_collector
        self._rcv_queue = WorkQueue(self._process_rx_sandesh)
        self._send_level = SandeshLevel.INVALID
        self._init_logger(self._module,
                          logger_class=logger_class,
                          logger_config_file=logger_config_file)
        self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                          connect_to_collector)
        from sandesh_stats import SandeshMessageStatistics
        self._msg_stats = SandeshMessageStatistics()
        self._trace = trace.Trace()
        self._sandesh_request_map = {}
        self._alarm_ack_callback = alarm_ack_callback
        self._config = config or SandeshConfig.from_parser_arguments()
        self._uve_type_maps = SandeshUVETypeMaps(self._logger)
        # Initialize the request handling
        # Import here to break the cyclic import dependency
        import sandesh_req_impl
        sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
        self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
        for pkg_name in self._sandesh_req_uve_pkg_list:
            self._create_sandesh_request_and_uve_lists(pkg_name)
        if self._config.disable_object_logs is not None:
            self.disable_sending_object_logs(self._config.disable_object_logs)
        if self._config.system_logs_rate_limit is not None:
            SandeshSystem.set_sandesh_send_rate_limit(
                self._config.system_logs_rate_limit)
        self._gev_httpd = None
        if http_port != -1:
            self.run_introspect_server(http_port)
        if self._connect_to_collector:
            self._client = SandeshClient(self)
            self._client.initiate(collectors)

    # end init_generator

    def run_introspect_server(self, http_port):
        self._http_server = SandeshHttp(self, self._module, http_port,
                                        self._sandesh_req_uve_pkg_list,
                                        self._config)
        self._gev_httpd = gevent.spawn(self._http_server.start_http_server)

    # end run_introspect_server

    def uninit(self):
        self.kill_httpd()

    def kill_httpd(self):
        if self._gev_httpd:
            try:
                self._http_server.stop_http_server()
                self._http_server = None
                gevent.sleep(0)
                self._gev_httpd.kill()
            except Exception as e:
                self._logger.debug(str(e))

    def record_port(self, name, port):
        pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name)
        try:
            pipeout = os.open(pipe_name, os.O_WRONLY)
        except Exception:
            self._logger.error('Cannot write %s_port %d to %s' %
                               (name, port, pipe_name))
        else:
            self._logger.error('Writing %s_port %d to %s' %
                               (name, port, pipe_name))
            os.write(pipeout, '%d\n' % port)
            os.close(pipeout)

    def logger(self):
        return self._logger

    # end logger

    def sandesh_logger(self):
        return self._sandesh_logger

    # end sandesh_logger

    def set_logging_params(self,
                           enable_local_log=False,
                           category='',
                           level=SandeshLevel.SYS_INFO,
                           file=sand_logger.SandeshLogger._DEFAULT_LOG_FILE,
                           enable_syslog=False,
                           syslog_facility=_DEFAULT_SYSLOG_FACILITY,
                           enable_trace_print=False,
                           enable_flow_log=False):
        self._sandesh_logger.set_logging_params(
            enable_local_log=enable_local_log,
            category=category,
            level=level,
            file=file,
            enable_syslog=enable_syslog,
            syslog_facility=syslog_facility,
            enable_trace_print=enable_trace_print,
            enable_flow_log=enable_flow_log)

    # end set_logging_params

    def set_trace_print(self, enable_trace_print):
        self._sandesh_logger.set_trace_print(enable_trace_print)

    # end set_trace_print

    def set_flow_logging(self, enable_flow_log):
        self._sandesh_logger.set_flow_logging(enable_flow_log)

    # end set_flow_logging

    def set_local_logging(self, enable_local_log):
        self._sandesh_logger.set_local_logging(enable_local_log)

    # end set_local_logging

    def set_logging_level(self, level):
        self._sandesh_logger.set_logging_level(level)

    # end set_logging_level

    def set_logging_category(self, category):
        self._sandesh_logger.set_logging_category(category)

    # end set_logging_category

    def set_logging_file(self, file):
        self._sandesh_logger.set_logging_file(file)

    # end set_logging_file

    def is_logging_dropped_allowed(self, sandesh):
        if sandesh.type() == SandeshType.FLOW:
            return self.is_flow_logging_enabled()
        else:
            if hasattr(sandesh, 'do_rate_limit_drop_log'):
                return sandesh.do_rate_limit_drop_log
            return True

    # end is_logging_dropped_allowed

    def is_send_queue_enabled(self):
        return self._send_queue_enabled

    # end is_send_queue_enabled

    def is_connect_to_collector_enabled(self):
        return self._connect_to_collector

    # end is_connect_to_collector_enabled

    def is_sending_object_logs_disabled(self):
        return self._disable_sending_object_logs

    # end is_sending_object_logs_disabled

    def disable_sending_object_logs(self, disable):
        if self._disable_sending_object_logs != disable:
            self._logger.info(
                "SANDESH: Disable Sending Object "
                "Logs: %s -> %s", self._disable_sending_object_logs, disable)
            self._disable_sending_object_logs = disable

    # end disable_sending_object_logs

    def is_sending_all_messages_disabled(self):
        return self._disable_sending_all_messages

    # end is_sending_all_messages_disabled

    def disable_sending_all_messages(self, disable):
        if self._disable_sending_all_messages != disable:
            self._logger.info(
                "SANDESH: Disable Sending ALL Messages: "
                "%s -> %s", self._disable_sending_all_messages, disable)
            self._disable_sending_all_messagess = disable

    # end disable_sending_all_messages

    def set_send_queue(self, enable):
        if self._send_queue_enabled != enable:
            self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s",
                              self._send_queue_enabled, enable)
            self._send_queue_enabled = enable
            if enable:
                connection = self._client.connection()
                if connection and connection.session():
                    connection.session().send_queue().may_be_start_runner()

    # end set_send_queue

    def set_send_level(self, count, sandesh_level):
        if self._send_level != sandesh_level:
            self._logger.info('Sandesh Send Level [%s] -> [%s]' % \
                              (SandeshLevel._VALUES_TO_NAMES[self._send_level],
                               SandeshLevel._VALUES_TO_NAMES[sandesh_level]))
            self._send_level = sandesh_level

    # end set_send_level

    def send_level(self):
        return self._send_level

    # end send_level

    def init_collector(self):
        pass

    # end init_collector

    def msg_stats(self):
        return self._msg_stats

    # end msg_stats

    def reconfig_collectors(self, collectors):
        self._client.set_collectors(collectors)

    # end reconfig_collectors

    @classmethod
    def next_seqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 1
        else:
            cls._lseqnum += 1
        return cls._lseqnum

    # end next_seqnum

    @classmethod
    def lseqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 0
        return cls._lseqnum

    # end lseqnum

    def module(self):
        return self._module

    # end module

    def source_id(self):
        return self._source

    # end source_id

    def node_type(self):
        return self._node_type

    # end node_type

    def instance_id(self):
        return self._instance_id

    # end instance_id

    def host_ip(self):
        return self._host_ip

    # end host_ip

    def scope(self):
        return self._scope

    # end scope

    def context(self):
        return self._context

    # end context

    def seqnum(self):
        return self._seqnum

    # end seqnum

    def timestamp(self):
        return self._timestamp

    # end timestamp

    def versionsig(self):
        return self._versionsig

    # end versionsig

    def type(self):
        return self._type

    # end type

    def hints(self):
        return self._hints

    # end hints

    def client(self):
        return self._client

    # end client

    def level(self):
        return self._level

    # end level

    def category(self):
        return self._category

    # end category

    def validate(self):
        return

    # end validate

    def alarm_ack_callback(self):
        return self._alarm_ack_callback

    # end alarm_ack_callback

    def config(self):
        return self._config

    # end config

    def is_flow_logging_enabled(self):
        return self._sandesh_logger.is_flow_logging_enabled()

    # end is_flow_logging_enabled

    def is_trace_print_enabled(self):
        return self._sandesh_logger.is_trace_print_enabled()

    # end is_trace_print_enabled

    def is_local_logging_enabled(self):
        return self._sandesh_logger.is_local_logging_enabled()

    # end is_local_logging_enabled

    def logging_level(self):
        return self._sandesh_logger.logging_level()

    # end logging_level

    def logging_category(self):
        return self._sandesh_logger.logging_category()

    # end logging_category

    def is_syslog_logging_enabled(self):
        return self._sandesh_logger.is_syslog_logging_enabled()

    # end is_syslog_logging_enabled

    def logging_syslog_facility(self):
        return self._sandesh_logger.logging_syslog_facility()

    # end logging_syslog_facility

    def is_unit_test(self):
        return self._role == self.SandeshRole.INVALID

    # end is_unit_test

    def handle_test(self, sandesh_init):
        if sandesh_init.is_unit_test() or self._is_level_ut():
            if self.is_logging_allowed(sandesh_init):
                sandesh_init._logger.debug(self.log())
            return True
        return False

    def is_logging_allowed(self, sandesh_init):
        if self._type == SandeshType.FLOW:
            return sandesh_init.is_flow_logging_enabled()

        if not sandesh_init.is_local_logging_enabled():
            return False

        logging_level = sandesh_init.logging_level()
        level_allowed = logging_level >= self._level

        logging_category = sandesh_init.logging_category()
        if logging_category is None or len(logging_category) == 0:
            category_allowed = True
        else:
            category_allowed = logging_category == self._category

        return level_allowed and category_allowed

    # end is_logging_allowed

    def enqueue_sandesh_request(self, sandesh):
        self._rcv_queue.enqueue(sandesh)

    # end enqueue_sandesh_request

    def send_sandesh(self, tx_sandesh):
        if self._client:
            self._client.send_sandesh(tx_sandesh)
        else:
            if self._connect_to_collector:
                self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient)
            else:
                self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient,
                                     tx_sandesh.level())

    # end send_sandesh

    def drop_tx_sandesh(self, tx_sandesh, drop_reason, level=None):
        self._msg_stats.update_tx_stats(tx_sandesh.__class__.__name__,
                                        sys.getsizeof(tx_sandesh), drop_reason)
        if self.is_logging_dropped_allowed(tx_sandesh):
            if level is not None:
                self._logger.log(
                    sand_logger.SandeshLogger.get_py_logger_level(level),
                    tx_sandesh.log())
            else:
                self._logger.error('SANDESH: [DROP: %s] %s' % \
                    (SandeshTxDropReason._VALUES_TO_NAMES[drop_reason],
                     tx_sandesh.log()))

    # end drop_tx_sandesh

    def send_generator_info(self):
        from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \
            ModuleClientState, SandeshModuleClientTrace
        if not self._client or not self._client.connection():
            return
        client_info = SandeshClientInfo()
        try:
            client_start_time = self._start_time
        except Exception:
            self._start_time = util.UTCTimestampUsec()
        finally:
            client_info.start_time = self._start_time
            client_info.pid = os.getpid()
            if self._http_server is not None:
                client_info.http_port = self._http_server.get_port()
            client_info.collector_name = \
                self._client.connection().collector_name() or ''
            client_info.collector_ip = \
                self._client.connection().collector() or ''
            client_info.collector_list = self._client.connection().collectors()
            client_info.status = self._client.connection().state()
            client_info.successful_connections = (
                self._client.connection().statemachine().connect_count())
            module_state = ModuleClientState(name=self._source + ':' +
                                             self._node_type + ':' +
                                             self._module + ':' +
                                             self._instance_id,
                                             client_info=client_info,
                                             sm_queue_count=self._client.\
                connection().statemachine()._event_queue.size(),
                                             max_sm_queue_count=self._client.\
                connection().statemachine()._event_queue.max_qlen())
            generator_info = SandeshModuleClientTrace(data=module_state,
                                                      sandesh=self)
            generator_info.send(sandesh=self)

    # end send_generator_info

    def get_sandesh_request_object(self, request):
        try:
            req_type = self._sandesh_request_map[request]
        except KeyError:
            self._logger.error('Invalid Sandesh Request "%s"' % (request))
            return None
        else:
            return req_type()

    # end get_sandesh_request_object

    def trace_enable(self):
        self._trace.TraceOn()

    # end trace_enable

    def trace_disable(self):
        self._trace.TraceOff()

    # end trace_disable

    def is_trace_enabled(self):
        return self._trace.IsTraceOn()

    # end is_trace_enabled

    def trace_buffer_create(self, name, size, enable=True):
        self._trace.TraceBufAdd(name, size, enable)

    # end trace_buffer_create

    def trace_buffer_delete(self, name):
        self._trace.TraceBufDelete(name)

    # end trace_buffer_delete

    def trace_buffer_enable(self, name):
        self._trace.TraceBufOn(name)

    # end trace_buffer_enable

    def trace_buffer_disable(self, name):
        self._trace.TraceBufOff(name)

    # end trace_buffer_disable

    def is_trace_buffer_enabled(self, name):
        return self._trace.IsTraceBufOn(name)

    # end is_trace_buffer_enabled

    def trace_buffer_list_get(self):
        return self._trace.TraceBufListGet()

    # end trace_buffer_list_get

    def trace_buffer_size_get(self, name):
        return self._trace.TraceBufSizeGet(name)

    # end trace_buffer_size_get

    def trace_buffer_read(self, name, read_context, count, read_cb):
        self._trace.TraceRead(name, read_context, count, read_cb)

    # end trace_buffer_read

    def trace_buffer_read_done(self, name, context):
        self._trace.TraceReadDone(name, context)

    # end trace_buffer_read_done

    # API to send the trace buffer to the Collector.
    # If trace count is not specified/or zero, then the entire trace buffer
    # is sent to the Collector.
    # [Note] No duplicate trace message sent to the Collector. i.e., If there
    # is no trace message added between two consequent calls to this API, then
    # no trace message is sent to the Collector.
    def send_sandesh_trace_buffer(self, trace_buf, count=0):
        trace_req_runner = SandeshTraceRequestRunner(
            sandesh=self,
            request_buffer_name=trace_buf,
            request_context='',
            read_context='Collector',
            request_count=count)
        trace_req_runner.Run()

    # end send_sandesh_trace_buffer

    # Private functions

    def _is_level_ut(self):
        return (self._level >= SandeshLevel.UT_START
                and self._level <= SandeshLevel.UT_END)

    # end _is_level_ut

    def _create_task(self):
        return gevent.spawn(self._runner.run_for_ever)

    # end _create_task

    def _process_rx_sandesh(self, rx_sandesh):
        handle_request_fn = getattr(rx_sandesh, "handle_request", None)
        if callable(handle_request_fn):
            handle_request_fn(rx_sandesh)
        else:
            self._logger.error('Sandesh Request "%s" not implemented' %
                               (rx_sandesh.__class__.__name__))

    # end _process_rx_sandesh

    def _create_sandesh_request_and_uve_lists(self, package):
        try:
            imp_pkg = __import__(package)
        except ImportError:
            self._logger.error('Failed to import package "%s"' % (package))
        else:
            try:
                pkg_path = imp_pkg.__path__
            except AttributeError:
                self._logger.error('Failed to get package [%s] path' %
                                   (package))
                return
            for importer, mod, ispkg in (pkgutil.walk_packages(
                    path=pkg_path, prefix=imp_pkg.__name__ + '.')):
                if not ispkg:
                    module = mod.rsplit('.', 1)[-1]
                    if 'ttypes' == module:
                        self._logger.debug(
                            'Add Sandesh requests in module "%s"' % (mod))
                        self._add_sandesh_request(mod)
                        self._logger.debug('Add Sandesh UVEs in module "%s"' %
                                           (mod))
                        self._add_sandesh_uve(mod)
                        self._logger.debug(
                            'Add Sandesh Alarms in module "%s"' % (mod))
                        self._add_sandesh_alarm(mod)

    # end _create_sandesh_request_and_uve_lists

    def _add_sandesh_request(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            try:
                sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST')
            except AttributeError:
                self._logger.error(
                    '"%s" module does not have sandesh request list' % (mod))
            else:
                # Add sandesh requests to the dictionary.
                for req in sandesh_req_list:
                    self._sandesh_request_map[req.__name__] = req

    # end _add_sandesh_request

    def _get_sandesh_uve_list(self, imp_module):
        try:
            sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST')
        except AttributeError:
            self._logger.error('"%s" module does not have sandesh UVE list' %
                               (imp_module.__name__))
            return None
        else:
            return sandesh_uve_list

    # end _get_sandesh_uve_list

    def _add_sandesh_uve(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_uve_list = self._get_sandesh_uve_list(imp_module)
            if not sandesh_uve_list:
                return
            # Register sandesh UVEs
            for uve_type, uve_data_type in sandesh_uve_list:
                SandeshUVEPerTypeMap(self, SandeshType.UVE, uve_type,
                                     uve_data_type)

    # end _add_sandesh_uve

    def _get_sandesh_alarm_list(self, imp_module):
        try:
            sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST')
        except AttributeError:
            self._logger.error('"%s" module does not have sandesh Alarm list' %
                               (imp_module.__name__))
            return None
        else:
            return sandesh_alarm_list

    # end _get_sandesh_alarm_list

    def _add_sandesh_alarm(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module)
            if not sandesh_alarm_list:
                return
            # Register sandesh Alarms
            for alarm_type, alarm_data_type in sandesh_alarm_list:
                SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type,
                                     alarm_data_type)

    # end _add_sandesh_alarm

    def _init_logger(self, module, logger_class=None, logger_config_file=None):
        if not module:
            module = 'sandesh'

        if logger_class:
            self._sandesh_logger = (sand_logger.create_logger(
                module, logger_class, logger_config_file=logger_config_file))
        else:
            self._sandesh_logger = sand_logger.SandeshLogger(
                module, logger_config_file=logger_config_file)
        self._logger = self._sandesh_logger.logger()
class Sandesh(object):
    _DEFAULT_LOG_FILE = SandeshLogger._DEFAULT_LOG_FILE
    _DEFAULT_SYSLOG_FACILITY = SandeshLogger._DEFAULT_SYSLOG_FACILITY

    class SandeshRole:
        INVALID = 0
        GENERATOR = 1
        COLLECTOR = 2
    # end class SandeshRole

    def __init__(self):
        self._context = ''
        self._scope = ''
        self._module = ''
        self._source = ''
        self._node_type = ''
        self._instance_id = ''
        self._timestamp = 0
        self._versionsig = 0
        self._type = 0
        self._hints = 0
        self._client_context = ''
        self._client = None
        self._role = self.SandeshRole.INVALID
        self._logger = None
        self._level = SandeshLevel.INVALID
        self._category = ''
        self._send_queue_enabled = True
        self._http_server = None
    # end __init__

    # Public functions

    def init_generator(self, module, source, node_type, instance_id,
                       collectors, client_context, 
                       http_port, sandesh_req_uve_pkg_list=None,
                       discovery_client=None):
        self._role = self.SandeshRole.GENERATOR
        self._module = module
        self._source = source
        self._node_type = node_type
        self._instance_id = instance_id
        self._client_context = client_context
        self._collectors = collectors
        self._rcv_queue = WorkQueue(self._process_rx_sandesh)
        self._init_logger(source + ':' + module + ':' + node_type + ':' \
            + instance_id)
        self._stats = SandeshStats()
        self._trace = trace.Trace()
        self._sandesh_request_dict = {}
        self._uve_type_maps = SandeshUVETypeMaps()
        if sandesh_req_uve_pkg_list is None:
            sandesh_req_uve_pkg_list = []
        # Initialize the request handling
        # Import here to break the cyclic import dependency
        import sandesh_req_impl
        sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
        sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
        for pkg_name in sandesh_req_uve_pkg_list:
            self._create_sandesh_request_and_uve_lists(pkg_name)
        if http_port != -1:
            self._http_server = SandeshHttp(
                self, module, http_port, sandesh_req_uve_pkg_list)
            gevent.spawn(self._http_server.start_http_server)
        primary_collector = None
        secondary_collector = None
        if self._collectors is not None:
            if len(self._collectors) > 0:
                primary_collector = self._collectors[0]
            if len(self._collectors) > 1:
                secondary_collector = self._collectors[1]
        self._client = SandeshClient(
            self, primary_collector, secondary_collector,
            discovery_client)
        self._client.initiate()
    # end init_generator

    def logger(self):
        return self._logger
    # end logger

    def sandesh_logger(self):
        return self._sandesh_logger
    # end sandesh_logger

    def set_logging_params(self, enable_local_log=False, category='',
                           level=SandeshLevel.SYS_INFO,
                           file=SandeshLogger._DEFAULT_LOG_FILE,
                           enable_syslog=False,
                           syslog_facility=_DEFAULT_SYSLOG_FACILITY):
        self._sandesh_logger.set_logging_params(
            enable_local_log, category, level, file,
            enable_syslog, syslog_facility)
    # end set_logging_params

    def set_local_logging(self, enable_local_log):
        self._sandesh_logger.set_local_logging(enable_local_log)
    # end set_local_logging

    def set_logging_level(self, level):
        self._sandesh_logger.set_logging_level(level)
    # end set_logging_level

    def set_logging_category(self, category):
        self._sandesh_logger.set_logging_category(category)
    # end set_logging_category

    def set_logging_file(self, file):
        self._sandesh_logger.set_logging_file(file)
    # end set_logging_file

    def is_send_queue_enabled(self):
        return self._send_queue_enabled
    # end is_send_queue_enabled

    def set_send_queue(self, enable):
        if self._send_queue_enabled != enable:
            self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s",
                              self._send_queue_enabled, enable)
            self._send_queue_enabled = enable
            if enable:
                connection = self._client.connection()
                if connection and connection.session():
                    connection.session().send_queue().may_be_start_runner()
    # end set_send_queue

    def init_collector(self):
        pass
    # end init_collector

    def stats(self):
        return self._stats
    # end stats

    @classmethod
    def next_seqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 1
        else:
            cls._lseqnum += 1
        return cls._lseqnum
    # end next_seqnum

    @classmethod
    def lseqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 0
        return cls._lseqnum
    # end lseqnum

    def module(self):
        return self._module
    # end module

    def source_id(self):
        return self._source
    # end source_id

    def node_type(self):
        return self._node_type
    #end node_type

    def instance_id(self):
        return self._instance_id
    #end instance_id

    def scope(self):
        return self._scope
    # end scope

    def context(self):
        return self._context
    # end context

    def seqnum(self):
        return self._seqnum
    # end seqnum

    def timestamp(self):
        return self._timestamp
    # end timestamp

    def versionsig(self):
        return self._versionsig
    # end versionsig

    def type(self):
        return self._type
    # end type

    def hints(self):
        return self._hints
    # end hints

    def client(self):
        return self._client
    # end client

    def level(self):
        return self._level
    # end level

    def category(self):
        return self._category
    # end category

    def validate(self):
        return
    # end validate

    def is_local_logging_enabled(self):
        return self._sandesh_logger.is_local_logging_enabled()
    # end is_local_logging_enabled

    def logging_level(self):
        return self._sandesh_logger.logging_level()
    # end logging_level

    def logging_category(self):
        return self._sandesh_logger.logging_category()
    # end logging_category

    def is_syslog_logging_enabled(self):
        return self._sandesh_logger.is_syslog_logging_enabled()
    #end is_syslog_logging_enabled

    def logging_syslog_facility(self):
        return self._sandesh_logger.logging_syslog_facility()
    #end logging_syslog_facility

    def is_unit_test(self):
        return self._role == self.SandeshRole.INVALID
    # end is_unit_test

    def handle_test(self, sandesh_init):
        if sandesh_init.is_unit_test() or self._is_level_ut():
            if self._is_logging_allowed(sandesh_init):
                sandesh_init._logger.debug(self.log())
                return True
        return False

    def is_logging_allowed(self, sandesh_init):
        if not sandesh_init.is_local_logging_enabled():
            return False

        logging_level = sandesh_init.logging_level()
        level_allowed = logging_level >= self._level

        logging_category = sandesh_init.logging_category()
        if logging_category is None or len(logging_category) == 0:
            category_allowed = True
        else:
            category_allowed = logging_category == self._category

        return level_allowed and category_allowed
    # end is_logging_allowed

    def enqueue_sandesh_request(self, sandesh):
        self._rcv_queue.enqueue(sandesh)
    # end enqueue_sandesh_request

    def send_sandesh(self, tx_sandesh):
        if self._client:
            ret = self._client.send_sandesh(tx_sandesh)
        else:
            self._logger.debug(tx_sandesh.log())
    # end send_sandesh

    def send_generator_info(self):
        from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \
            ModuleClientState, SandeshModuleClientTrace
        client_info = SandeshClientInfo()
        try:
            client_start_time = self._start_time
        except:
            self._start_time = UTCTimestampUsec()
        finally:
            client_info.start_time = self._start_time
            client_info.pid = os.getpid()
            if self._http_server is not None:
                client_info.http_port = self._http_server.get_port()
            client_info.collector_name = self._client.connection().collector()
            client_info.status = self._client.connection().state()
            client_info.successful_connections = \
                self._client.connection().statemachine().connect_count()
            client_info.primary = self._client.connection().primary_collector()
            if client_info.primary is None:
                client_info.primary = ''
            client_info.secondary = \
                self._client.connection().secondary_collector()
            if client_info.secondary is None:
                client_info.secondary = ''
            module_state = ModuleClientState(name=self._source + ':' +
                                             self._node_type + ':' + 
                                             self._module + ':' +
                                             self._instance_id, 
                                             client_info=client_info)
            generator_info = SandeshModuleClientTrace(
                data=module_state, sandesh=self)
            generator_info.send(sandesh=self)
    # end send_generator_info

    def get_sandesh_request_object(self, request):
        try:
            req_module = self._sandesh_request_dict[request]
        except KeyError:
            self._logger.error('Invalid Sandesh Request "%s"' % (request))
            return None
        else:
            if req_module:
                try:
                    imp_module = importlib.import_module(req_module)
                except ImportError:
                    self._logger.error(
                        'Failed to import Module "%s"' % (req_module))
                else:
                    try:
                        sandesh_request = getattr(imp_module, request)()
                        return sandesh_request
                    except AttributeError:
                        self._logger.error(
                            'Failed to create Sandesh Request "%s"' %
                            (request))
                        return None
            else:
                self._logger.error(
                    'Sandesh Request "%s" not implemented' % (request))
                return None
    # end get_sandesh_request_object

    def trace_enable(self):
        self._trace.TraceOn()
    # end trace_enable

    def trace_disable(self):
        self._trace.TraceOff()
    # end trace_disable

    def is_trace_enabled(self):
        return self._trace.IsTraceOn()
    # end is_trace_enabled

    def trace_buffer_create(self, name, size, enable=True):
        self._trace.TraceBufAdd(name, size, enable)
    # end trace_buffer_create

    def trace_buffer_delete(self, name):
        self._trace.TraceBufDelete(name)
    # end trace_buffer_delete

    def trace_buffer_enable(self, name):
        self._trace.TraceBufOn(name)
    # end trace_buffer_enable

    def trace_buffer_disable(self, name):
        self._trace.TraceBufOff(name)
    # end trace_buffer_disable

    def is_trace_buffer_enabled(self, name):
        return self._trace.IsTraceBufOn(name)
    # end is_trace_buffer_enabled

    def trace_buffer_list_get(self):
        return self._trace.TraceBufListGet()
    # end trace_buffer_list_get

    def trace_buffer_size_get(self, name):
        return self._trace.TraceBufSizeGet(name)
    # end trace_buffer_size_get

    def trace_buffer_read(self, name, read_context, count, read_cb):
        self._trace.TraceRead(name, read_context, count, read_cb)
    # end trace_buffer_read

    def trace_buffer_read_done(self, name, context):
        self._trace.TraceReadDone(name, context)
    # end trace_buffer_read_done

    # API to send the trace buffer to the Collector.
    # If trace count is not specified/or zero, then the entire trace buffer
    # is sent to the Collector.
    # [Note] No duplicate trace message sent to the Collector. i.e., If there
    # is no trace message added between two consequent calls to this API, then
    # no trace message is sent to the Collector.
    def send_sandesh_trace_buffer(self, trace_buf, count=0):
        trace_req_runner = SandeshTraceRequestRunner(sandesh=self,
                                                     request_buffer_name=
                                                     trace_buf,
                                                     request_context='',
                                                     read_context='Collector',
                                                     request_count=count)
        trace_req_runner.Run()
    # end send_sandesh_trace_buffer

    # Private functions

    def _is_level_ut(self):
        return self._level >= SandeshLevel.UT_START and \
            self._level <= SandeshLevel.UT_END
    # end _is_level_ut

    def _create_task(self):
        return gevent.spawn(self._runner.run_for_ever)
    # end _create_task

    def _process_rx_sandesh(self, rx_sandesh):
        handle_request_fn = getattr(rx_sandesh, "handle_request", None)
        if callable(handle_request_fn):
            handle_request_fn(rx_sandesh)
        else:
            self._logger.error('Sandesh Request "%s" not implemented' %
                               (rx_sandesh.__class__.__name__))
    # end _process_rx_sandesh

    def _create_sandesh_request_and_uve_lists(self, package):
        try:
            imp_pkg = __import__(package)
        except ImportError:
            self._logger.error('Failed to import package "%s"' % (package))
        else:
            try:
                pkg_path = imp_pkg.__path__
            except AttributeError:
                self._logger.error(
                    'Failed to get package [%s] path' % (package))
                return
            for importer, mod, ispkg in \
                pkgutil.walk_packages(path=pkg_path,
                                      prefix=imp_pkg.__name__ + '.'):
                if not ispkg:
                    module = mod.rsplit('.', 1)[-1]
                    if 'ttypes' == module:
                        self._logger.debug(
                            'Add Sandesh requests in module "%s"' % (mod))
                        self._add_sandesh_request(mod)
                        self._logger.debug(
                            'Add Sandesh UVEs in module "%s"' % (mod))
                        self._add_sandesh_uve(mod)
    # end _create_sandesh_request_and_uve_lists

    def _add_sandesh_request(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            try:
                sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST')
            except AttributeError:
                self._logger.error(
                    '"%s" module does not have sandesh request list' % (mod))
            else:
                # Add sandesh requests to the dictionary.
                for req in sandesh_req_list:
                    self._sandesh_request_dict[req] = mod
    # end _add_sandesh_request

    def _add_sandesh_uve(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            try:
                sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST')
            except AttributeError:
                self._logger.error(
                    '"%s" module does not have sandesh UVE list' % (mod))
            else:
                # Register sandesh UVEs
                for uve_type_name in sandesh_uve_list:
                    SandeshUVEPerTypeMap(self, uve_type_name, mod)
    # end _add_sandesh_uve

    def _init_logger(self, generator):
        if not generator:
            generator = 'sandesh'
        self._sandesh_logger = SandeshLogger(generator)
        self._logger = self._sandesh_logger.logger()
class SandeshStateMachine(object):

    _IDLE_HOLD_TIME = 4 # in seconds
    _CONNECT_TIME = 30 # in seconds

    def __init__(self, connection, logger, collectors):

        def _update_connection_state(e, status):
            from connection_info import ConnectionState
            from gen_py.process_info.ttypes import ConnectionType
            collector_addr = e.sm.collector()
            if collector_addr is None:
                collector_addr = ''
            ConnectionState.update(conn_type = ConnectionType.COLLECTOR,
                name = '',
                status = status,
                server_addrs = [collector_addr],
                message = '%s to %s on %s' % (e.src, e.dst, e.event))
        #end _update_connection_state

        def _connection_state_up(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.UP)
        #end _connection_state_up

        def _connection_state_down(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.DOWN)
        #end _connection_state_down

        def _connection_state_init(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.INIT)
        #end _connection_state_init

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._disable != True:
	        e.sm._start_idle_hold_timer()
            # update connection state
            _connection_state_down(e)
            e.sm._collector_name = None
            e.sm._connection.sandesh_instance().send_generator_info()
        #end _on_idle

        def _on_disconnect(e):
            # update connection state
            _connection_state_down(e)
        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._collector_name = None
            # clean up existing connection
            e.sm._delete_session()
            collector = e.sm._get_next_collector()
            if collector is not None:
                # update connection state
                _connection_state_init(e)
                e.sm._create_session(collector)
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN))
        #end _on_connect

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_init(e)
        #end _on_client_init
        
        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._collector_name = e.sm_event.source
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_up(e)
        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
                           'initial': {'state' : State._IDLE,
                                       'event' : Event._EV_START,
                                       'defer' : True
                                      },
                           'events': [
                                      # _IDLE
                                      {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_START,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },

                                      # _DISCONNECT 
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._DISCONNECT,
                                       'dst'  : State._CONNECT
                                      },

                                      # _CONNECT
                                      {'name' : Event._EV_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._DISCONNECT
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CLIENT_INIT
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._ESTABLISHED
                                      },

                                      # _ESTABLISHED
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_STOP,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT
                                      }
                                     ],
                           'callbacks': {
                                         'on' + State._IDLE : _on_idle,
                                         'on' + State._CONNECT : _on_connect,
                                         'on' + State._CLIENT_INIT : _on_client_init,
                                         'on' + State._ESTABLISHED : _on_established,
                                        }
                          })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._disable = False
        self._idle_hold_timer = None
        self._connect_timer = None
        self._collectors = collectors
        self._collector_name = None
        self._collector_index = -1
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)
    #end __init__

    # Public functions

    def initialize(self):
        self.enqueue_event(Event(event = Event._EV_START))
    #end initialize

    def session(self):
        return self._session 
    #end session 

    def state(self):
        return self._fsm.current
    #end state 

    def shutdown(self):
        self._disable = True
        self.enqueue_event(Event(event = Event._EV_STOP))
    #end shutdown

    def set_admin_state(self, down):
        if down == True:
            self._disable = True
            self.enqueue_event(Event(event = Event._EV_STOP))
        else:
            self._disable = False
            self.enqueue_event(Event(event = Event._EV_START))
    #end set_admin_state

    def connect_count(self):
        return self._connects
    #end connect_count

    def collector(self):
        if self._collector_index is -1:
            return None
        return self._collectors[self._collector_index]
    # end collector

    def collector_name(self):
        return self._collector_name
    # end collector_name

    def collectors(self):
        return self._collectors
    # end collectors

    def enqueue_event(self, event):
        self._event_queue.enqueue(event)
    #end enqueue_event

    def on_session_event(self, session, event):
        if session is not self._session:
            self._logger.error("Ignore session event [%d] received for old session" % (event))
            return 
        if SandeshSession.SESSION_ESTABLISHED == event:
            self._logger.info("Session Event: TCP Connected")
            self.enqueue_event(Event(event = Event._EV_TCP_CONNECTED,
                                     session = session))
        elif SandeshSession.SESSION_ERROR == event:
            self._logger.error("Session Event: TCP Connect Fail")
            self.enqueue_event(Event(event = Event._EV_TCP_CONNECT_FAIL,
                                     session = session))
        elif SandeshSession.SESSION_CLOSE == event:
            self._logger.error("Session Event: TCP Connection Closed")
            self.enqueue_event(Event(event = Event._EV_TCP_CLOSE,
                                     session = session))
        else:
            self._logger.error("Received unknown session event [%d]" % (event))
    #end on_session_event

    def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector):
        if sandesh_ctrl.success == True:
            self.enqueue_event(Event(event = Event._EV_SANDESH_CTRL_MESSAGE_RECV, 
                                     session = session,
                                     msg = sandesh_ctrl,
                                     source = collector))
        else:
            # Negotiation with the Collector failed, reset the 
            # connection and retry after sometime.
            self._logger.error("Negotiation with the Collector %s failed." % (collector))
            self._session.close()
    #end on_sandesh_ctrl_msg_receive

    def on_sandesh_uve_msg_send(self, sandesh_uve):
        self.enqueue_event(Event(event = Event._EV_SANDESH_UVE_SEND,
                                 msg = sandesh_uve))
    #end on_sandesh_uve_msg_send

    # Private functions

    def _create_session(self, collector):
        assert self._session is None
        collector_ip_port = collector.split(':')
        server = (collector_ip_port[0], int(collector_ip_port[1]))
        self._session = SandeshSession(self._connection.sandesh_instance(),
                                       server,
                                       self.on_session_event,
                                       self._connection._receive_sandesh_msg) 
    #end _create_session

    def _delete_session(self):
        if self._session:
            self._session.close()
            self._session = None
            self._collector_name = None
    #end _delete_session 

    def _get_next_collector(self):
        if self._collector_index is -1:
            if not self._collectors:
                return None
            self._collector_index = 0
        else:
            self._collector_index += 1
            if self._collector_index == len(self._collectors):
                self._collector_index = 0
        return self._collectors[self._collector_index]
    # end _get_next_collector

    def _start_idle_hold_timer(self):
        if self._idle_hold_timer is None:
            if self._IDLE_HOLD_TIME:
                self._idle_hold_timer = gevent.spawn_later(self._IDLE_HOLD_TIME,
                                            self._idle_hold_timer_expiry_handler)
            else:
                self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED))
    #end _start_idle_hold_timer

    def _cancel_idle_hold_timer(self):
        if self._idle_hold_timer is not None:
            gevent.kill(self._idle_hold_timer)
            self._idle_hold_timer = None
    #end _cancel_idle_hold_timer

    def _idle_hold_timer_expiry_handler(self):
        self._idle_hold_timer = None
        self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED))
    #end _idle_hold_timer_expiry_handler
    
    def _start_connect_timer(self):
        if self._connect_timer is None:
            self._connect_timer = gevent.spawn_later(self._CONNECT_TIME,
                                        self._connect_timer_expiry_handler, 
                                        self._session)
    #end _start_connect_timer

    def _cancel_connect_timer(self):
        if self._connect_timer is not None:
            gevent.kill(self._connect_timer)
            self._connect_timer = None
    #end _cancel_connect_timer

    def _connect_timer_expiry_handler(self, session):
        self._connect_timer = None
        self.enqueue_event(Event(event = Event._EV_CONNECT_TIMER_EXPIRED,
                                 session = session))
    #end _connect_timer_expiry_handler

    def _is_ready_to_dequeue_event(self):
        return True
    #end _is_ready_to_dequeue_event

    def _log_event(self, event):
        if self._fsm.current == State._ESTABLISHED and \
           event.event == Event._EV_SANDESH_UVE_SEND:
           return False
        return True
    #end _log_event

    def _dequeue_event(self, event):
        if self._log_event(event):
            self._logger.info("Processing event[%s] in state[%s]" \
                              % (event.event, self._fsm.current))
        if event.session is not None and event.session is not self._session:
            self._logger.info("Ignore event [%s] received for old session" \
                              % (event.event))
            return
        if event.event == Event._EV_COLLECTOR_CHANGE:
            collector = self.collector()
            self._collector_index = -1
            collector_list_change = False
            if self._collectors != event.collectors:
                self._collectors = event.collectors
                collector_list_change = True
            if self._collectors and self._collectors[0] == collector:
                self._collector_index = 0
                self._logger.info("No change in active collector. "
                    "Ignore event [%s]" % (event.event))
                if collector_list_change:
                    # update the collector_list in the ModuleClientState UVE
                    self._connection.sandesh_instance().send_generator_info()
                return
            self._connection.sandesh_instance().send_generator_info()
        if event.event == Event._EV_SANDESH_UVE_SEND:
            if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT:
                self._connection.handle_sandesh_uve_msg(event.msg)
            else:
                self._connection.sandesh_instance().drop_tx_sandesh(event.msg,
                    SandeshTxDropReason.WrongClientSMState)
                self._logger.info("Discarding event[%s] in state[%s]" \
                                  % (event.event, self._fsm.current))
        elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \
                self._fsm.current == State._ESTABLISHED:
            self._connection.handle_sandesh_ctrl_msg(event.msg)
        elif self._fsm.cannot(event.event) is True:
            self._logger.info("Unconsumed event[%s] in state[%s]" \
                              % (event.event, self._fsm.current))
        else:
            prev_state = self.state()
            getattr(self._fsm, event.event)(sm = self, sm_event = event)
            # Log state transition
            self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \
                              % (event.event, prev_state, self.state()))
from work_queue import Task, WorkQueue, set_debug_flag
from work_queue import WORK_QUEUE_SCHEDULE_FCFS, WORK_QUEUE_SCHEDULE_FILES
from work_queue import WORK_QUEUE_RANDOM_PORT
from work_queue import WORK_QUEUE_OUTPUT
#from workqueue import WORK_QUEUE_MASTER_MODE_STANDALONE, WORK_QUEUE_WORKER_MODE_SHARED
from work_queue import WORK_QUEUE_TASK_ORDER_LIFO

import os
import sys
import time

set_debug_flag('debug')
set_debug_flag('wq')

wq = WorkQueue(WORK_QUEUE_RANDOM_PORT,
               name='workqueue_example',
               catalog=True,
               exclusive=False)
os.environ['PATH'] = '../../../work_queue/src:' + os.environ['PATH']
os.system('work_queue_worker -d all localhost %d &' % wq.port)

print wq.name

wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
#wq.specify_name('workqueue_example')
#wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE)
#wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED)
wq.specify_task_order(WORK_QUEUE_TASK_ORDER_LIFO)

if wq.empty():
    print 'work queue is empty'
Example #28
0
class JobGenerator(object):
    def __init__(self, template, debug=False, dry_run=False):
        self.config = Configuration()
        self.debug = debug
        self.dry_run = dry_run

        # The work queue will figure out a valid combination of MongoDB access
        # parameters, e.g., host/port, URI, or replica set discovery via DNS
        self.wq = WorkQueue(host=self.config.mongodb_host,
                            port=self.config.mongodb_port,
                            uri=self.config.mongodb_uri,
                            srv_name=self.config.mongodb_rs_srv,
                            database=self.config.mongodb_queue_db,
                            replicaset=self.config.mongodb_rs,
                            collection=self.config.mongodb_queue_col)

        if not os.path.exists(template):
            raise Exception("Template file does not exist")
        self.template_dir = os.path.dirname(template)
        self.template_file = os.path.basename(template)
        if self.template_file == "":
            raise Exception("Template must be a file, not a directory")

        self.jinja = jinja2.Environment(loader=jinja2.FileSystemLoader(
            self.template_dir),
                                        autoescape=False)

    def _generate_random_id(self, team_cyan, team_magenta, suffix_length=8):
        return team_cyan + "-vs-" + team_magenta + ":" + \
         ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(suffix_length))
        #''.join(random.choices(string.ascii_uppercase + string.digits, k=8))

    def _generate_id(self,
                     tournament_name,
                     team_cyan,
                     team_magenta,
                     index,
                     suffix_length=6):
        return \
         "{tournament_name}:{:0{width}}:{team_cyan}-vs-{team_magenta}"\
         .format(index, width=suffix_length, tournament_name=tournament_name,
                 team_cyan=team_cyan, team_magenta=team_magenta)

    @staticmethod
    def id_regex(tournament_name):
        return "^%s:\d+:.*" % tournament_name

    def generate(self, tournament_name, team_cyan, team_magenta, job_num=None):
        param_vars = {
            "tournament_name": tournament_name,
            "team_name_cyan": team_cyan,
            "team_name_magenta": team_magenta or ""
        }
        template = self.jinja.get_template(self.template_file)
        if template is None:
            print("Failed to get template '%s' (in '%s')" %
                  (self.template_file, self.template_dir))
            raise FileNotFoundError("Could not find template '%s' ( in '%s')" \
                                    % (template_name, self.template_dir))

        yamldoc = template.render(param_vars)
        if self.debug: print("YAML:\n%s" % yamldoc)

        try:
            (tournament_doc, parameter_doc) = yaml.load_all(yamldoc)
        except:
            for idx, line in enumerate(yamldoc.splitlines()):
                print("%-4d: %s" % (idx, line))
            raise

        #if self.debug:
        #print("Tournament:\n")
        #pprint(tournament_doc)
        #print("Parameters:\n")
        #pprint(parameter_doc)
        #print("Parameters:")
        #for p in parameter_doc["parameters"]:
        #	print("- %s" % p["template"])

        idnum = job_num if job_num is not None else 1 if self.dry_run else self.wq.get_next_id(
        )
        jobname = self._generate_id(tournament_name, team_cyan, team_magenta,
                                    idnum)

        params = {
            "parameter_vars": param_vars,
            "parameter_doc_yaml": yamldoc,
            "template_parameters": parameter_doc["parameters"]
        }
        #if self.debug:
        #print("Job Parameters")
        #pprint(params)

        return (jobname, idnum, params)

    def store(self, jobname, idnum, params):
        if not self.dry_run:
            self.wq.add_item(jobname, idnum, params)

    def generate_and_store(self, tournament_name, team_cyan, team_magenta):
        (jobname, idnum, params) = self.generate(tournament_name, team_cyan,
                                                 team_magenta)
        self.store(jobname, idnum, params)
        return (jobname, idnum, params)

    def update_params(self,
                      tournament_name,
                      print_diffs=False,
                      only_pending=True):
        for i in self.wq.get_items(JobGenerator.id_regex(tournament_name)):
            if only_pending and i['status']['state'] != 'pending': continue

            job_parts = i['name'].split(':')
            teams = job_parts[2].split("-vs-")
            (jobname, idnum, params) = \
                self.generate(job_parts[0], teams[0], teams[1], job_num=int(job_parts[1]))
            if jobname != i['name']:
                raise Exception("Invalid jobname, expected '%s', got '%s'" %
                                (i['name'], jobname))
            diff = unified_diff(
                i["params"]["parameter_doc_yaml"].splitlines(True),
                params['parameter_doc_yaml'].splitlines(True),
                fromfile='%s OLD' % jobname,
                tofile='%s' % jobname)
            diffstr = ''.join(diff)
            if print_diffs:
                if len(diffstr) == 0:
                    print("%s: no update required" % jobname)
                else:
                    print(diffstr)

            if not self.dry_run and len(diffstr) > 0:
                update = {
                    "$push": {
                        "updates": {
                            "updated": datetime.datetime.utcnow(),
                            "diff": diffstr
                        }
                    },
                    "$set": {
                        "status.state": "pending",
                        "params": params
                    },
                    "$unset": {
                        "manifests": "",
                        "status.completed": "",
                        "status.running": ""
                    }
                }
                if self.debug: pprint(update)
                self.wq.update_item(jobname, update)

    def cancel_jobs(self, tournament_name, only_pending=True):
        update = {
            "$set": {
                "status.state": "cancelled",
                "status.cancelled": datetime.datetime.utcnow()
            }
        }
        for i in self.wq.get_items(JobGenerator.id_regex(tournament_name)):
            if i['status']['state'] != 'cancelled' and \
               (not only_pending or i['status']['state'] == 'pending'):
                print("Cancelling %s" % i['name'])
                if not self.dry_run:
                    self.wq.update_item(i['name'], update)
class Sandesh(object):
    _DEFAULT_LOG_FILE = sand_logger.SandeshLogger._DEFAULT_LOG_FILE
    _DEFAULT_SYSLOG_FACILITY = (
        sand_logger.SandeshLogger._DEFAULT_SYSLOG_FACILITY)

    class SandeshRole:
        INVALID = 0
        GENERATOR = 1
        COLLECTOR = 2
    # end class SandeshRole

    def __init__(self):
        self._context = ''
        self._scope = ''
        self._module = ''
        self._source = ''
        self._node_type = ''
        self._instance_id = ''
        self._timestamp = 0
        self._versionsig = 0
        self._type = 0
        self._hints = 0
        self._client_context = ''
        self._client = None
        self._role = self.SandeshRole.INVALID
        self._logger = None
        self._level = SandeshLevel.INVALID
        self._category = ''
        self._send_queue_enabled = True
        self._http_server = None
        self._connect_to_collector = True
    # end __init__

    # Public functions

    def init_generator(self, module, source, node_type, instance_id,
                       collectors, client_context,
                       http_port, sandesh_req_uve_pkg_list=None,
                       discovery_client=None, connect_to_collector=True,
                       logger_class=None, logger_config_file=None,
                       host_ip='127.0.0.1', alarm_ack_callback=None):
        self._role = self.SandeshRole.GENERATOR
        self._module = module
        self._source = source
        self._node_type = node_type
        self._instance_id = instance_id
        self._host_ip = host_ip
        self._client_context = client_context
        self._collectors = collectors
        self._connect_to_collector = connect_to_collector
        self._rcv_queue = WorkQueue(self._process_rx_sandesh)
        self._send_level = SandeshLevel.INVALID
        self._init_logger(module, logger_class=logger_class,
                          logger_config_file=logger_config_file)
        self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s',
                          connect_to_collector)
        from sandesh_stats import SandeshMessageStatistics
        self._msg_stats = SandeshMessageStatistics()
        self._trace = trace.Trace()
        self._sandesh_request_map = {}
        self._alarm_ack_callback = alarm_ack_callback
        self._uve_type_maps = SandeshUVETypeMaps(self._logger)
        if sandesh_req_uve_pkg_list is None:
            sandesh_req_uve_pkg_list = []
        # Initialize the request handling
        # Import here to break the cyclic import dependency
        import sandesh_req_impl
        sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self)
        sandesh_req_uve_pkg_list.append('pysandesh.gen_py')
        for pkg_name in sandesh_req_uve_pkg_list:
            self._create_sandesh_request_and_uve_lists(pkg_name)
        self._gev_httpd = None
        if http_port != -1:
            self._http_server = SandeshHttp(
                self, module, http_port, sandesh_req_uve_pkg_list)
            self._gev_httpd = gevent.spawn(self._http_server.start_http_server)
        primary_collector = None
        secondary_collector = None
        if self._collectors is not None:
            if len(self._collectors) > 0:
                primary_collector = self._collectors[0]
            if len(self._collectors) > 1:
                secondary_collector = self._collectors[1]
        if self._connect_to_collector:
            self._client = SandeshClient(
                self, primary_collector, secondary_collector,
                discovery_client)
            self._client.initiate()
    # end init_generator

    def uninit(self):
        self.kill_httpd()

    def kill_httpd(self):
        if self._gev_httpd:
            try:
                self._http_server.stop_http_server()
                self._http_server = None
                gevent.sleep(0)
                self._gev_httpd.kill()
            except Exception as e:
                self._logger.debug(str(e))

    def record_port(self, name, port):
        pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name)
        try:
            pipeout = os.open(pipe_name, os.O_WRONLY)
        except Exception:
            self._logger.error('Cannot write %s_port %d to %s'
                               % (name, port, pipe_name))
        else:
            self._logger.error('Writing %s_port %d to %s'
                               % (name, port, pipe_name))
            os.write(pipeout, '%d\n' % port)
            os.close(pipeout)

    def logger(self):
        return self._logger
    # end logger

    def sandesh_logger(self):
        return self._sandesh_logger
    # end sandesh_logger

    def set_logging_params(self, enable_local_log=False, category='',
                           level=SandeshLevel.SYS_INFO,
                           file=sand_logger.SandeshLogger._DEFAULT_LOG_FILE,
                           enable_syslog=False,
                           syslog_facility=_DEFAULT_SYSLOG_FACILITY,
                           enable_trace_print=False,
                           enable_flow_log=False):
        self._sandesh_logger.set_logging_params(
            enable_local_log=enable_local_log, category=category,
            level=level, file=file, enable_syslog=enable_syslog,
            syslog_facility=syslog_facility,
            enable_trace_print=enable_trace_print,
            enable_flow_log=enable_flow_log)
    # end set_logging_params

    def set_trace_print(self, enable_trace_print):
        self._sandesh_logger.set_trace_print(enable_trace_print)
    # end set_trace_print

    def set_flow_logging(self, enable_flow_log):
        self._sandesh_logger.set_flow_logging(enable_flow_log)
    # end set_flow_logging

    def set_local_logging(self, enable_local_log):
        self._sandesh_logger.set_local_logging(enable_local_log)
    # end set_local_logging

    def set_logging_level(self, level):
        self._sandesh_logger.set_logging_level(level)
    # end set_logging_level

    def set_logging_category(self, category):
        self._sandesh_logger.set_logging_category(category)
    # end set_logging_category

    def set_logging_file(self, file):
        self._sandesh_logger.set_logging_file(file)
    # end set_logging_file

    def is_logging_dropped_allowed(self, sandesh):
        if sandesh.type() == SandeshType.FLOW:
            return self.is_flow_logging_enabled()
        else:
            if hasattr(sandesh, 'do_rate_limit_drop_log'):
                return sandesh.do_rate_limit_drop_log
            return True
    # end is_logging_dropped_allowed

    def is_send_queue_enabled(self):
        return self._send_queue_enabled
    # end is_send_queue_enabled

    def is_connect_to_collector_enabled(self):
        return self._connect_to_collector
    # end is_connect_to_collector_enabled

    def set_send_queue(self, enable):
        if self._send_queue_enabled != enable:
            self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s",
                              self._send_queue_enabled, enable)
            self._send_queue_enabled = enable
            if enable:
                connection = self._client.connection()
                if connection and connection.session():
                    connection.session().send_queue().may_be_start_runner()
    # end set_send_queue

    def set_send_level(self, count, sandesh_level):
        if self._send_level != sandesh_level:
            self._logger.info('Sandesh Send Level [%s] -> [%s]' % \
                              (SandeshLevel._VALUES_TO_NAMES[self._send_level],
                               SandeshLevel._VALUES_TO_NAMES[sandesh_level]))
            self._send_level = sandesh_level
    # end set_send_level

    def send_level(self):
        return self._send_level
    # end send_level

    def init_collector(self):
        pass
    # end init_collector

    def msg_stats(self):
        return self._msg_stats
    # end msg_stats

    @classmethod
    def next_seqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 1
        else:
            cls._lseqnum += 1
        return cls._lseqnum
    # end next_seqnum

    @classmethod
    def lseqnum(cls):
        if not hasattr(cls, '_lseqnum'):
            cls._lseqnum = 0
        return cls._lseqnum
    # end lseqnum

    def module(self):
        return self._module
    # end module

    def source_id(self):
        return self._source
    # end source_id

    def node_type(self):
        return self._node_type
    # end node_type

    def instance_id(self):
        return self._instance_id
    # end instance_id

    def host_ip(self):
        return self._host_ip
    # end host_ip

    def scope(self):
        return self._scope
    # end scope

    def context(self):
        return self._context
    # end context

    def seqnum(self):
        return self._seqnum
    # end seqnum

    def timestamp(self):
        return self._timestamp
    # end timestamp

    def versionsig(self):
        return self._versionsig
    # end versionsig

    def type(self):
        return self._type
    # end type

    def hints(self):
        return self._hints
    # end hints

    def client(self):
        return self._client
    # end client

    def level(self):
        return self._level
    # end level

    def category(self):
        return self._category
    # end category

    def validate(self):
        return
    # end validate

    def alarm_ack_callback(self):
        return self._alarm_ack_callback
    # end alarm_ack_callback

    def is_flow_logging_enabled(self):
        return self._sandesh_logger.is_flow_logging_enabled()
    # end is_flow_logging_enabled

    def is_trace_print_enabled(self):
        return self._sandesh_logger.is_trace_print_enabled()
    # end is_trace_print_enabled

    def is_local_logging_enabled(self):
        return self._sandesh_logger.is_local_logging_enabled()
    # end is_local_logging_enabled

    def logging_level(self):
        return self._sandesh_logger.logging_level()
    # end logging_level

    def logging_category(self):
        return self._sandesh_logger.logging_category()
    # end logging_category

    def is_syslog_logging_enabled(self):
        return self._sandesh_logger.is_syslog_logging_enabled()
    # end is_syslog_logging_enabled

    def logging_syslog_facility(self):
        return self._sandesh_logger.logging_syslog_facility()
    # end logging_syslog_facility

    def is_unit_test(self):
        return self._role == self.SandeshRole.INVALID
    # end is_unit_test

    def handle_test(self, sandesh_init):
        if sandesh_init.is_unit_test() or self._is_level_ut():
            if self.is_logging_allowed(sandesh_init):
                sandesh_init._logger.debug(self.log())
            return True
        return False

    def is_logging_allowed(self, sandesh_init):
        if self._type == SandeshType.FLOW:
            return sandesh_init.is_flow_logging_enabled()

        if not sandesh_init.is_local_logging_enabled():
            return False

        logging_level = sandesh_init.logging_level()
        level_allowed = logging_level >= self._level

        logging_category = sandesh_init.logging_category()
        if logging_category is None or len(logging_category) == 0:
            category_allowed = True
        else:
            category_allowed = logging_category == self._category

        return level_allowed and category_allowed
    # end is_logging_allowed

    def enqueue_sandesh_request(self, sandesh):
        self._rcv_queue.enqueue(sandesh)
    # end enqueue_sandesh_request

    def send_sandesh(self, tx_sandesh):
        if self._client:
            self._client.send_sandesh(tx_sandesh)
        else:
            if self._connect_to_collector:
                self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient)
            else:
                self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient,
                    tx_sandesh.level())
    # end send_sandesh

    def drop_tx_sandesh(self, tx_sandesh, drop_reason, level=None):
        self._msg_stats.update_tx_stats(tx_sandesh.__class__.__name__,
            sys.getsizeof(tx_sandesh), drop_reason)
        if self.is_logging_dropped_allowed(tx_sandesh):
            if level is not None:
                self._logger.log(
                    sand_logger.SandeshLogger.get_py_logger_level(level),
                    tx_sandesh.log())
            else:
                self._logger.error('SANDESH: [DROP: %s] %s' % \
                    (SandeshTxDropReason._VALUES_TO_NAMES[drop_reason],
                     tx_sandesh.log()))
    # end drop_tx_sandesh

    def send_generator_info(self):
        from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \
            ModuleClientState, SandeshModuleClientTrace
        client_info = SandeshClientInfo()
        try:
            client_start_time = self._start_time
        except Exception:
            self._start_time = util.UTCTimestampUsec()
        finally:
            client_info.start_time = self._start_time
            client_info.pid = os.getpid()
            if self._http_server is not None:
                client_info.http_port = self._http_server.get_port()
            client_info.collector_name = self._client.connection().collector()
            client_info.status = self._client.connection().state()
            client_info.successful_connections = (
                self._client.connection().statemachine().connect_count())
            client_info.primary = self._client.connection().primary_collector()
            if client_info.primary is None:
                client_info.primary = ''
            client_info.secondary = (
                self._client.connection().secondary_collector())
            if client_info.secondary is None:
                client_info.secondary = ''
            module_state = ModuleClientState(name=self._source + ':' +
                                             self._node_type + ':' +
                                             self._module + ':' +
                                             self._instance_id,
                                             client_info=client_info)
            generator_info = SandeshModuleClientTrace(
                data=module_state, sandesh=self)
            generator_info.send(sandesh=self)
    # end send_generator_info

    def get_sandesh_request_object(self, request):
        try:
            req_type = self._sandesh_request_map[request]
        except KeyError:
            self._logger.error('Invalid Sandesh Request "%s"' % (request))
            return None
        else:
            return req_type()
    # end get_sandesh_request_object

    def trace_enable(self):
        self._trace.TraceOn()
    # end trace_enable

    def trace_disable(self):
        self._trace.TraceOff()
    # end trace_disable

    def is_trace_enabled(self):
        return self._trace.IsTraceOn()
    # end is_trace_enabled

    def trace_buffer_create(self, name, size, enable=True):
        self._trace.TraceBufAdd(name, size, enable)
    # end trace_buffer_create

    def trace_buffer_delete(self, name):
        self._trace.TraceBufDelete(name)
    # end trace_buffer_delete

    def trace_buffer_enable(self, name):
        self._trace.TraceBufOn(name)
    # end trace_buffer_enable

    def trace_buffer_disable(self, name):
        self._trace.TraceBufOff(name)
    # end trace_buffer_disable

    def is_trace_buffer_enabled(self, name):
        return self._trace.IsTraceBufOn(name)
    # end is_trace_buffer_enabled

    def trace_buffer_list_get(self):
        return self._trace.TraceBufListGet()
    # end trace_buffer_list_get

    def trace_buffer_size_get(self, name):
        return self._trace.TraceBufSizeGet(name)
    # end trace_buffer_size_get

    def trace_buffer_read(self, name, read_context, count, read_cb):
        self._trace.TraceRead(name, read_context, count, read_cb)
    # end trace_buffer_read

    def trace_buffer_read_done(self, name, context):
        self._trace.TraceReadDone(name, context)
    # end trace_buffer_read_done

    # API to send the trace buffer to the Collector.
    # If trace count is not specified/or zero, then the entire trace buffer
    # is sent to the Collector.
    # [Note] No duplicate trace message sent to the Collector. i.e., If there
    # is no trace message added between two consequent calls to this API, then
    # no trace message is sent to the Collector.
    def send_sandesh_trace_buffer(self, trace_buf, count=0):
        trace_req_runner = SandeshTraceRequestRunner(
            sandesh=self, request_buffer_name=trace_buf, request_context='',
            read_context='Collector', request_count=count)
        trace_req_runner.Run()
    # end send_sandesh_trace_buffer

    # Private functions

    def _is_level_ut(self):
        return (self._level >= SandeshLevel.UT_START and
                self._level <= SandeshLevel.UT_END)
    # end _is_level_ut

    def _create_task(self):
        return gevent.spawn(self._runner.run_for_ever)
    # end _create_task

    def _process_rx_sandesh(self, rx_sandesh):
        handle_request_fn = getattr(rx_sandesh, "handle_request", None)
        if callable(handle_request_fn):
            handle_request_fn(rx_sandesh)
        else:
            self._logger.error('Sandesh Request "%s" not implemented' %
                               (rx_sandesh.__class__.__name__))
    # end _process_rx_sandesh

    def _create_sandesh_request_and_uve_lists(self, package):
        try:
            imp_pkg = __import__(package)
        except ImportError:
            self._logger.error('Failed to import package "%s"' % (package))
        else:
            try:
                pkg_path = imp_pkg.__path__
            except AttributeError:
                self._logger.error(
                    'Failed to get package [%s] path' % (package))
                return
            for importer, mod, ispkg in (
                pkgutil.walk_packages(path=pkg_path,
                                      prefix=imp_pkg.__name__ + '.')):
                if not ispkg:
                    module = mod.rsplit('.', 1)[-1]
                    if 'ttypes' == module:
                        self._logger.debug(
                            'Add Sandesh requests in module "%s"' % (mod))
                        self._add_sandesh_request(mod)
                        self._logger.debug(
                            'Add Sandesh UVEs in module "%s"' % (mod))
                        self._add_sandesh_uve(mod)
                        self._logger.debug(
                            'Add Sandesh Alarms in module "%s"' % (mod))
                        self._add_sandesh_alarm(mod)
    # end _create_sandesh_request_and_uve_lists

    def _add_sandesh_request(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            try:
                sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST')
            except AttributeError:
                self._logger.error(
                    '"%s" module does not have sandesh request list' % (mod))
            else:
                # Add sandesh requests to the dictionary.
                for req in sandesh_req_list:
                    self._sandesh_request_map[req.__name__] = req
    # end _add_sandesh_request

    def _get_sandesh_uve_list(self, imp_module):
        try:
            sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh UVE list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_uve_list
    # end _get_sandesh_uve_list

    def _add_sandesh_uve(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_uve_list = self._get_sandesh_uve_list(imp_module)
            if not sandesh_uve_list:
                return
            # Register sandesh UVEs
            for uve_type, uve_data_type in sandesh_uve_list:
                SandeshUVEPerTypeMap(self, SandeshType.UVE,
                    uve_type, uve_data_type)
    # end _add_sandesh_uve

    def _get_sandesh_alarm_list(self, imp_module):
        try:
            sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST')
        except AttributeError:
            self._logger.error(
                '"%s" module does not have sandesh Alarm list' %
                (imp_module.__name__))
            return None
        else:
            return sandesh_alarm_list
    # end _get_sandesh_alarm_list

    def _add_sandesh_alarm(self, mod):
        try:
            imp_module = importlib.import_module(mod)
        except ImportError:
            self._logger.error('Failed to import Module "%s"' % (mod))
        else:
            sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module)
            if not sandesh_alarm_list:
                return
            # Register sandesh Alarms
            for alarm_type, alarm_data_type in sandesh_alarm_list:
                SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type,
                    alarm_data_type)
    # end _add_sandesh_alarm

    def _init_logger(self, module, logger_class=None,
                     logger_config_file=None):
        if not module:
            module = 'sandesh'

        if logger_class:
            self._sandesh_logger = (sand_logger.create_logger(
                module, logger_class,
                logger_config_file=logger_config_file))
        else:
            self._sandesh_logger = sand_logger.SandeshLogger(
                module, logger_config_file=logger_config_file)
        self._logger = self._sandesh_logger.logger()
Example #30
0
class QMaster(threading.Thread):
    def __init__(self, project, port, log_freq=600): # 600 seconds
        """Initialize the QMaster
        
        Parameters
        ----------
        project : 
        port : int
        log_freq : int, optional
            frequency to print info about the status of the work queue.
            In units of seconds. Default is to print every 10 minutes.
        """
        
        threading.Thread.__init__(self)
        self.project = project
        
        self.log_freq = log_freq  # print time in seconds
        self.wake_freq = 1 # seconds
        
        self.wq = WorkQueue(port, name='MSMAccelerator', catalog=True, exclusive=False)

        logger.info('WORK QUEUE MASTER LISTENING ON PORT: %d', self.wq.port)
        logger.info('(Start a local worker with >> work_queue_worker -d all localhost %d & )', self.wq.port)
        
        # method controls whether or not we need to bring back solvated_xtc as well
        if self.project.method == 'explicit':
            self.return_wet_xtc = True
        elif self.project.method == 'implicit':
            self.return_wet_xtc = False
        else:
            raise Exception("project.method must be 'explicit' or 'implicit'")
        logger.info('Return wet xtc set to %s', self.return_wet_xtc)
        
        # what does this specify algorithm do?
        self.wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
        
        # fast abort kills jobs that appear to be stragling (taking more than 1.5x average)
        #self.wq.activate_fast_abort(1.5)
        
        # setting the stop event signals for the thread to die
        self._stop = threading.Event()
        
        # the thread sets the  event every time a job returns or there are no waiting jobs
        # and it finished post processing. See the wait method
        self._mainloop_wake_event_cause = None
        self._mainloop_wake_event = threading.Event()
        
        # start the thread
        self.start()
    
    def run(self):
        """Main thread-loop for the QMaster thread"""
        last_print = time.time()
        
        while True:
            time.sleep(self.wake_freq)
            
            if not self.wq.empty():
                t = self.wq.wait(self.wake_freq)
                if t:
                    if t.return_status != 0:
                        logger.error('Worker returned nonzero exit status for job: %d', t.return_status)
                    else:
                        self.on_return(t)
                    self._mainloop_wake_event_cause = 'job returned'
                    self._mainloop_wake_event.set()
            
            if self.wq.stats.tasks_waiting == 0 and not self._mainloop_wake_event.is_set():
                self._mainloop_wake_event_cause = 'queue empty'
                self._mainloop_wake_event.set() # also set the event if there are no tasks in the queue

            if self._stop.is_set():
                logger.info('Recieved stop signal. Shutting down all workers')
                self.wq.shutdown_workers(0) # 0 indicates to shut all of them down
                sys.exit(0)
            
            if time.time() - last_print > self.log_freq:
                logger.info('workers initialized: %d, ready: %d, busy: %d', self.wq.stats.workers_init, self.wq.stats.workers_ready, self.wq.stats.workers_busy)
                logger.info('workers running: %d, waiting: %d, complete: %d', self.wq.stats.tasks_running, self.wq.stats.tasks_waiting, self.wq.stats.tasks_complete)
                last_print = time.time()

    def num_jobs_waiting(self):
        """Number of jobs waiting to be sent out
        
        This number should be kept at 1, and when it drops to zero a new job
        should be generated.
        
        Returns
        -------
        n : int
            The number
        """
        return self.wq.stats.tasks_waiting

    def num_jobs_in_queue(self):
        """Get the number of jobs currently in the work queue
        
        This includes both the jobs running remotely and the ones waiting
        here
        
        Returns
        -------
        n : int
            The number
        """
        return self.wq.stats.tasks_running + self.wq.stats.tasks_waiting
        
    def stop(self):
        """Signal the Qmaster thread to stop"""
        self._stop.set()

    def wait(self):
        """Block until some sort of action happens in the main-thread loop.
        
        This call will return either when a job as returned from the workers,
        or when the queue is empty (last job in the local queue has been sent
        out)
        
        Returns
        -------
        wakeup_cause : str
            Either 'job returned' or 'queue empty', depending on the reason
        """
        self._mainloop_wake_event.wait()
        self._mainloop_wake_event.clear()
        cause = self._mainloop_wake_event_cause
        if not cause in ['job returned', 'queue empty']:
            raise Exception('Bad wakeup cause')
        return cause
    
    @with_db_lock
    def submit(self, traj):
        """ Submit a job to the work-queue for further sampling.
        
        Parameters
        ----------
        """
        if traj.submit_time is not None:
            raise ValueError("This traj has already been submitted")
        Session.add(traj)
        Session.flush()
        traj.populate_default_filenames()
        
        if not hasattr(traj, 'init_pdb'):
            raise ValueError('Traj is supposed to have a pdb object tacked on')            
        save_file(traj.init_pdb_fn, traj.init_pdb)
        
        remote_driver_fn = os.path.split(str(traj.forcefield.driver))[1]
        remote_pdb_fn = 'input.pdb'
        remote_output_fn = 'production_dry{}'.format(traj.forcefield.output_extension)
        
        if traj.mode is None or traj.forcefield is None:
            raise ValueError('malformed traj')

        task = Task('chmod +x ./{driver}; ./{driver} {pdb_fn} {ff} {water} {mode} {threads}'.format(
            pdb_fn=remote_pdb_fn,
            mode=traj.mode,
            driver=remote_driver_fn,
            ff=traj.forcefield.name,
            water=traj.forcefield.water,
            threads=traj.forcefield.threads))
        
        
        #why does traj.forcefield.driver come out as unicode?
        task.specify_input_file(str(traj.forcefield.driver), remote_driver_fn)
        task.specify_output_file(traj.wqlog_fn, 'logs/driver.log')
        task.specify_input_file(traj.init_pdb_fn, remote_pdb_fn)
        task.specify_output_file(traj.dry_xtc_fn, remote_output_fn)
        
        if self.return_wet_xtc:
            # this is the XTC file with waters, generated by the driver
            # when you're doing implicit solvent only, this stuff is not used.
            remote_wet_output_fn = 'production_wet{}'.format(traj.forcefield.output_extension)
            task.specify_output_file(traj.wet_xtc_fn, remote_wet_output_fn)
            task.specify_output_file(traj.last_wet_snapshot_fn, 'last_wet_snapshot.pdb')
        else:
            logger.debug('Not requesting production_wet%s from driver (implicit)', traj.forcefield.output_extension)
        
        task.specify_tag(str(traj.id))
        task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) # what does this do?
        
        traj.submit_time = datetime.now()

        self.wq.submit(task)    
        logger.info('Submitted to queue: %s', traj)
    
    @with_db_lock
    def on_return(self, task):
        """Called by main thread on the return of data from the workers.
        Post-processing"""
        logger.info('Retrieved task %s', task.tag)
        traj = Session.query(models.Trajectory).get(int(task.tag))
        
        try:
            # save lh5 version of the trajectory
            conf = load_file(self.project.pdb_topology_file)
            coordinates = msmbuilder.Trajectory.load_trajectory_file(str(traj.dry_xtc_fn), Conf=conf)
            save_file(traj.lh5_fn, coordinates)
        
        except Exception as e:
            logger.error('When postprocessing %s, convert to lh5 failed!', traj)
            logger.exception(e)
            raise
        
        # convert last_wet_snapshot to lh5
        pdb_to_lh5(traj, 'last_wet_snapshot_fn')
        pdb_to_lh5(traj, 'init_pdb_fn')


        traj.host = task.host
        traj.returned_time = datetime.now()
        traj.length = len(coordinates)
        logger.info('Finished converting new traj to lh5 sucessfully')
""" Python-WorkQueue test """

from work_queue import Task, WorkQueue, set_debug_flag
from work_queue import WORK_QUEUE_SCHEDULE_FCFS, WORK_QUEUE_SCHEDULE_FILES
from work_queue import WORK_QUEUE_RANDOM_PORT
from work_queue import WORK_QUEUE_OUTPUT
#from workqueue import WORK_QUEUE_MASTER_MODE_STANDALONE, WORK_QUEUE_WORKER_MODE_SHARED

import os
import sys
import time

set_debug_flag('debug')
set_debug_flag('wq')

wq = WorkQueue(WORK_QUEUE_RANDOM_PORT, name='workqueue_example', catalog=False, exclusive=False)
os.system('work_queue_worker -d all localhost %d &' % wq.port)

print wq.name

wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS)
#wq.specify_name('workqueue_example')
#wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE)
#wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED)

if wq.empty():
    print 'work queue is empty'

outputs = []

for i in range(5):
    def __init__(self, connection, logger, primary_collector, 
                 secondary_collector):

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # Reset active and backup collector
            self._active_collector = self._connection.primary_collector()
            self._backup_collector = self._connection.secondary_collector()
            # clean up existing connection
            e.sm._delete_session()
            e.sm._start_idle_hold_timer()
        #end _on_idle

        def _on_disconnect(e):
            pass
        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._connection.reset_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._active_collector is not None:
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN))
        #end _on_connect

        def _on_connect_to_backup(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            # try to connect to the backup collector, if known
            if e.sm._backup_collector is not None:
                e.sm._active_collector, e.sm._backup_collector = \
                    e.sm._backup_collector, e.sm._active_collector
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN))
        #end _on_connect_to_backup

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
        #end _on_client_init
        
        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._connection.set_collector(e.sm_event.source)
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            self._connection.sandesh_instance().send_generator_info()
        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
                           'initial': {'state' : State._IDLE,
                                       'event' : Event._EV_START,
                                       'defer' : True
                                      },
                           'events': [
                                      # _IDLE
                                      {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },

                                      # _DISCONNECT 
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._DISCONNECT,
                                       'dst'  : State._CONNECT
                                      },

                                      # _CONNECT
                                      {'name' : Event._EV_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._DISCONNECT
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CONNECT_TO_BACKUP
                                      {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CLIENT_INIT
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._ESTABLISHED
                                      },

                                      # _ESTABLISHED
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT
                                      }
                                     ],
                           'callbacks': {
                                         'on' + State._IDLE : _on_idle,
                                         'on' + State._CONNECT : _on_connect,
                                         'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup,
                                         'on' + State._CLIENT_INIT : _on_client_init,
                                         'on' + State._ESTABLISHED : _on_established,
                                        }
                          })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._idle_hold_timer = None
        self._connect_timer = None
        self._active_collector = primary_collector
        self._backup_collector = secondary_collector
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)
Example #33
0
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(),
                          queue_lock=threading.Lock(),
                          launch_cmd=None,
                          env=None,
                          collector_queue=multiprocessing.Queue(),
                          see_worker_output=False,
                          data_dir=".",
                          full=False,
                          cancel_value=multiprocessing.Value('i', 1),
                          port=WORK_QUEUE_DEFAULT_PORT,
                          wq_log_dir=None,
                          project_password=None,
                          project_password_file=None,
                          project_name=None):
    """Thread to handle Parsl app submissions to the Work Queue objects.
    Takes in Parsl functions submitted using submit(), and creates a
    Work Queue task with the appropriate specifications, which is then
    submitted to Work Queue. After tasks are completed, processes the
    exit status and exit code of the task, and sends results to the
    Work Queue collector thread.
    """
    logger.debug("Starting WorkQueue Submit/Wait Process")

    # Enable debugging flags and create logging file
    if wq_log_dir is not None:
        logger.debug("Setting debugging flags and creating logging file")
        wq_debug_log = os.path.join(wq_log_dir, "debug_log")
        cctools_debug_flags_set("all")
        cctools_debug_config_file(wq_debug_log)

    # Create WorkQueue queue object
    logger.debug("Creating WorkQueue Object")
    try:
        logger.debug("Listening on port {}".format(port))
        q = WorkQueue(port)
    except Exception as e:
        logger.error("Unable to create WorkQueue object: {}".format(e))
        raise e

    # Specify WorkQueue queue attributes
    if project_name:
        q.specify_name(project_name)
    if project_password:
        q.specify_password(project_password)
    elif project_password_file:
        q.specify_password_file(project_password_file)

    # Only write logs when the wq_log_dir is specified, which it most likely will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)
        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    wq_tasks = set()
    orig_ppid = os.getppid()
    continue_running = True
    while (continue_running):
        # Monitor the task queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            logger.debug("new Process")
            continue_running = False
            continue

        # Submit tasks
        while task_queue.qsize() > 0:
            if cancel_value.value == 0:
                logger.debug("cancel value set to cancel")
                continue_running = False
                break

            # Obtain task from task_queue
            try:
                item = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue
            parsl_id = item["task_id"]

            # Extract information about the task
            function_data_loc = item["data_loc"]
            function_data_loc_remote = function_data_loc.split("/")[-1]
            function_result_loc = item["result_loc"]
            function_result_loc_remote = function_result_loc.split("/")[-1]
            input_files = item["input_files"]
            output_files = item["output_files"]
            std_files = item["std_files"]

            full_script_name = workqueue_worker.__file__
            script_name = full_script_name.split("/")[-1]

            remapping_string = ""
            std_string = ""

            # Parse input file information
            logger.debug("Looking at input")
            for item in input_files:
                if item[3] == "std":
                    std_string += "mv " + item[1] + " " + item[0] + "; "
                else:
                    remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            # Parse output file information
            logger.debug("Looking at output")
            for item in output_files:
                remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            if len(input_files) + len(output_files) > 0:
                remapping_string = "-r " + remapping_string
                remapping_string = remapping_string[:-1]

            # Create command string
            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                input_file=function_data_loc_remote,
                output_file=function_result_loc_remote,
                remapping_string=remapping_string)
            command_str = std_string + command_str
            logger.debug(command_str)

            # Create WorkQueue task for the command
            logger.debug("Sending task {} with command: {}".format(
                parsl_id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                continue

            # Specify environment variables for the task
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            # Specify script, and data/result files for task
            t.specify_file(full_script_name,
                           script_name,
                           WORK_QUEUE_INPUT,
                           cache=True)
            t.specify_file(function_data_loc,
                           function_data_loc_remote,
                           WORK_QUEUE_INPUT,
                           cache=False)
            t.specify_file(function_result_loc,
                           function_result_loc_remote,
                           WORK_QUEUE_OUTPUT,
                           cache=False)
            t.specify_tag(str(parsl_id))
            logger.debug("Parsl ID: {}".format(t.id))

            # Specify all input/output files for task
            for item in input_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_INPUT,
                               cache=item[2])
            for item in output_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])
            for item in std_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            # Submit the task to the WorkQueue object
            logger.debug("Submitting task {} to WorkQueue".format(parsl_id))
            try:
                wq_id = q.submit(t)
                wq_tasks.add(wq_id)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))

                msg = {
                    "tid": parsl_id,
                    "result_received": False,
                    "reason": "Workqueue Task Start Failure",
                    "status": 1
                }

                collector_queue.put_nowait(msg)
                continue

            logger.debug("Task {} submitted to WorkQueue with id {}".format(
                parsl_id, wq_id))

        if cancel_value.value == 0:
            continue_running = False

        # If the queue is not empty wait on the WorkQueue queue for a task
        task_found = True
        if not q.empty() and continue_running:
            while task_found is True:
                if cancel_value.value == 0:
                    continue_running = False
                    task_found = False
                    continue

                # Obtain the task from the queue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                else:
                    parsl_tid = t.tag
                    logger.debug(
                        "Completed WorkQueue task {}, parsl task {}".format(
                            t.id, parsl_tid))
                    status = t.return_status
                    task_result = t.result
                    msg = None

                    # Task failure
                    if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS
                                       and task_result !=
                                       WORK_QUEUE_RESULT_OUTPUT_MISSING):
                        logger.debug(
                            "Wrapper Script status: {}\nWorkQueue Status: {}".
                            format(status, task_result))
                        # Wrapper script failure
                        if status != 0:
                            logger.debug(
                                "WorkQueue task {} failed with status {}".
                                format(t.id, status))
                            reason = "Wrapper Script Failure: "
                            if status == 1:
                                reason += "problem parsing command line options"
                            elif status == 2:
                                reason += "problem loading function data"
                            elif status == 3:
                                reason += "problem remapping file names"
                            elif status == 4:
                                reason += "problem writing out function result"
                            else:
                                reason += "unable to process wrapper script failure with status = {}".format(
                                    status)
                            reason += "\nTrace:\n" + str(t.output)
                            logger.debug(
                                "WorkQueue runner script failed for task {} because {}\n"
                                .format(parsl_tid, reason))
                        # WorkQueue system failure
                        else:
                            reason = "WorkQueue System Failure: "
                            if task_result == 1:
                                reason += "missing input file"
                            elif task_result == 2:
                                reason += "unable to generate output file"
                            elif task_result == 4:
                                reason += "stdout has been truncated"
                            elif task_result == 1 << 3:
                                reason += "task terminated with a signal"
                            elif task_result == 2 << 3:
                                reason += "task used more resources than requested"
                            elif task_result == 3 << 3:
                                reason += "task ran past the specified end time"
                            elif task_result == 4 << 3:
                                reason += "result could not be classified"
                            elif task_result == 5 << 3:
                                reason += "task failed, but not a task error"
                            elif task_result == 6 << 3:
                                reason += "unable to complete after specified number of retries"
                            elif task_result == 7 << 3:
                                reason += "task ran for more than the specified time"
                            elif task_result == 8 << 3:
                                reason += "task needed more space to complete task"
                            else:
                                reason += "unable to process Work Queue system failure"

                        msg = {
                            "tid": parsl_tid,
                            "result_received": False,
                            "reason": reason,
                            "status": status
                        }

                        collector_queue.put_nowait(msg)

                    # Task Success
                    else:
                        # Print the output from the task
                        if see_worker_output:
                            print(t.output)

                        # Load result into result file
                        result_loc = os.path.join(
                            data_dir,
                            "task_" + str(parsl_tid) + "_function_result")
                        logger.debug(
                            "Looking for result in {}".format(result_loc))
                        f = open(result_loc, "rb")
                        result = pickle.load(f)
                        f.close()

                        msg = {
                            "tid": parsl_tid,
                            "result_received": True,
                            "result": result
                        }
                        wq_tasks.remove(t.id)

                    collector_queue.put_nowait(msg)

        if continue_running is False:
            logger.debug("Exiting WorkQueue Master Thread event loop")
            break

    # Remove all WorkQueue tasks that remain in the queue object
    for wq_task in wq_tasks:
        logger.debug("Cancelling WorkQueue Task {}".format(wq_task))
        q.cancel_by_taskid(wq_task)

    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
Example #34
0
class SandeshSession(TcpSession):
    _KEEPALIVE_IDLE_TIME = 45  # in secs
    _KEEPALIVE_INTERVAL = 3  # in secs
    _KEEPALIVE_PROBES = 5

    def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler):
        self._sandesh_instance = sandesh_instance
        self._logger = sandesh_instance._logger
        self._event_handler = event_handler
        self._reader = SandeshReader(self, sandesh_msg_handler)
        self._writer = SandeshWriter(self)
        self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh)
        TcpSession.__init__(self, server)

    # end __init__

    # Public functions

    def sandesh_instance(self):
        return self._sandesh_instance

    # end sandesh_instance

    def is_send_queue_empty(self):
        return self._send_queue.is_queue_empty()

    # end is_send_queue_empty

    def is_connected(self):
        return self._connected

    # end is_connected

    def enqueue_sandesh(self, sandesh):
        self._send_queue.enqueue(sandesh)

    # end enqueue_sandesh

    def send_queue(self):
        return self._send_queue

    # end send_queue

    # Overloaded functions from TcpSession

    def connect(self):
        TcpSession.connect(self, timeout=5)

    # end connect

    def _on_read(self, buf):
        if self._reader.read_msg(buf) < 0:
            self._logger.error("SandeshReader Error. Close Collector session")
            self.close()

    # end _on_read

    def _handle_event(self, event):
        self._event_handler(self, event)

    # end _handle_event

    def _set_socket_options(self):
        self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
        if hasattr(socket, "TCP_KEEPIDLE"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, "TCP_KEEPALIVE"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, self._KEEPALIVE_IDLE_TIME)
        if hasattr(socket, "TCP_KEEPINTVL"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, self._KEEPALIVE_INTERVAL)
        if hasattr(socket, "TCP_KEEPCNT"):
            self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, self._KEEPALIVE_PROBES)

    # end _set_socket_options

    # Private functions

    def _send_sandesh(self, sandesh):
        if self._send_queue.is_queue_empty():
            more = False
        else:
            more = True
        if not self._connected:
            self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log())
            return
        if sandesh.is_logging_allowed(self._sandesh_instance):
            self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log())
        self._writer.send_msg(sandesh, more)

    # end _send_sandesh

    def _is_ready_to_send_sandesh(self):
        return self._sandesh_instance.is_send_queue_enabled()
Example #35
0
def work_queue_main(items, function, accumulator, **kwargs):
    """Execute using Work Queue

    For valid parameters, see :py:func:`work_queue_executor` in :py:mod:`executor`.
    For more information, see :ref:`intro-coffea-wq`
    """

    global _wq_queue

    _check_dynamic_chunksize_targets(kwargs["dynamic_chunksize"])

    clevel = kwargs["compression"]
    if clevel is not None:
        function = _compression_wrapper(clevel, function)
        accumulate_fn = _compression_wrapper(clevel, accumulate_result_files)
    else:
        accumulate_fn = accumulate_result_files

    _vprint.verbose_mode = kwargs["verbose"] or kwargs["print_stdout"]
    _vprint.status_mode = kwargs["status"]

    if not kwargs["port"]:
        kwargs["port"] = 0 if kwargs["master_name"] else 9123

    if kwargs["environment_file"] and not kwargs["wrapper"]:
        raise ValueError(
            "Location of python_package_run could not be determined automatically.\nUse 'wrapper' argument to the work_queue_executor."
        )

    if _wq_queue is None:
        _wq_queue = WorkQueue(
            port=kwargs["port"],
            name=kwargs["master_name"],
            debug_log=kwargs["debug_log"],
            stats_log=kwargs["stats_log"],
            transactions_log=kwargs["transactions_log"],
        )

        # Make use of the stored password file, if enabled.
        if kwargs["password_file"] is not None:
            _wq_queue.specify_password_file(kwargs["password_file"])

        print("Listening for work queue workers on port {}...".format(_wq_queue.port))
        # perform a wait to print any warnings before progress bars
        _wq_queue.wait(0)

    _declare_resources(kwargs)

    # Working within a custom temporary directory:
    with tempfile.TemporaryDirectory(
        prefix="wq-executor-tmp-", dir=kwargs["filepath"]
    ) as tmpdir:
        fn_wrapper = _create_fn_wrapper(kwargs["x509_proxy"], tmpdir=tmpdir)
        infile_function = _function_to_file(
            function, prefix_name=kwargs["function_name"], tmpdir=tmpdir
        )
        infile_accum_fn = _function_to_file(
            accumulate_fn, prefix_name="accum", tmpdir=tmpdir
        )

        if kwargs["custom_init"]:
            kwargs["custom_init"](_wq_queue)

        if kwargs["desc"] == "Preprocessing":
            return _work_queue_preprocessing(
                items, accumulator, fn_wrapper, infile_function, tmpdir, kwargs
            )
        else:
            return _work_queue_processing(
                items,
                accumulator,
                fn_wrapper,
                infile_function,
                infile_accum_fn,
                tmpdir,
                kwargs,
            )
class SandeshStateMachine(object):

    _IDLE_HOLD_TIME = 5 # in seconds
    _CONNECT_TIME = 30 # in seconds

    def __init__(self, connection, logger, primary_collector, 
                 secondary_collector):

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # Reset active and backup collector
            self._active_collector = self._connection.primary_collector()
            self._backup_collector = self._connection.secondary_collector()
            # clean up existing connection
            e.sm._delete_session()
            e.sm._start_idle_hold_timer()
        #end _on_idle

        def _on_disconnect(e):
            pass
        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._connection.reset_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._active_collector is not None:
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN))
        #end _on_connect

        def _on_connect_to_backup(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            # try to connect to the backup collector, if known
            if e.sm._backup_collector is not None:
                e.sm._active_collector, e.sm._backup_collector = \
                    e.sm._backup_collector, e.sm._active_collector
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN))
        #end _on_connect_to_backup

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
        #end _on_client_init
        
        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._connection.set_collector(e.sm_event.source)
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            self._connection.sandesh_instance().send_generator_info()
        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
                           'initial': {'state' : State._IDLE,
                                       'event' : Event._EV_START,
                                       'defer' : True
                                      },
                           'events': [
                                      # _IDLE
                                      {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },

                                      # _DISCONNECT 
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._DISCONNECT,
                                       'dst'  : State._CONNECT
                                      },

                                      # _CONNECT
                                      {'name' : Event._EV_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._DISCONNECT
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CONNECT_TO_BACKUP
                                      {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CLIENT_INIT
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._ESTABLISHED
                                      },

                                      # _ESTABLISHED
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT
                                      }
                                     ],
                           'callbacks': {
                                         'on' + State._IDLE : _on_idle,
                                         'on' + State._CONNECT : _on_connect,
                                         'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup,
                                         'on' + State._CLIENT_INIT : _on_client_init,
                                         'on' + State._ESTABLISHED : _on_established,
                                        }
                          })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._idle_hold_timer = None
        self._connect_timer = None
        self._active_collector = primary_collector
        self._backup_collector = secondary_collector
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)
    #end __init__

    # Public functions

    def initialize(self):
        self.enqueue_event(Event(event = Event._EV_START))
    #end initialize

    def session(self):
        return self._session 
    #end session 

    def state(self):
        return self._fsm.current
    #end state 

    def shutdown(self):
        self.enqueue_event(Event(event = Event._EV_STOP))
    #end shutdown

    def set_admin_state(self, down):
        if down == True:
            self.enqueue_event(Event(event = Event._EV_STOP))
        else:
            self.enqueue_event(Event(event = Event._EV_START))
    #end set_admin_state

    def connect_count(self):
        return self._connects
    #end connect_count

    def active_collector(self):
        return self._active_collector
    #end active_collector

    def backup_collector(self):
        return self._backup_collector
    #end backup_collector

    def enqueue_event(self, event):
        self._event_queue.enqueue(event)
    #end enqueue_event

    def on_session_event(self, session, event):
        if session is not self._session:
            self._logger.error("Ignore session event [%d] received for old session" % (event))
            return 
        if SandeshSession.SESSION_ESTABLISHED == event:
            self._logger.info("Session Event: TCP Connected")
            self.enqueue_event(Event(event = Event._EV_TCP_CONNECTED,
                                     session = session))
        elif SandeshSession.SESSION_ERROR == event:
            self._logger.error("Session Event: TCP Connect Fail")
            self.enqueue_event(Event(event = Event._EV_TCP_CONNECT_FAIL,
                                     session = session))
        elif SandeshSession.SESSION_CLOSE == event:
            self._logger.error("Session Event: TCP Connection Closed")
            self.enqueue_event(Event(event = Event._EV_TCP_CLOSE,
                                     session = session))
        else:
            self._logger.error("Received unknown session event [%d]" % (event))
    #end on_session_event

    def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector):
        if sandesh_ctrl.success == True:
            self.enqueue_event(Event(event = Event._EV_SANDESH_CTRL_MESSAGE_RECV, 
                                     session = session,
                                     msg = sandesh_ctrl,
                                     source = collector))
        else:
            # Negotiation with the Collector failed, reset the 
            # connection and retry after sometime.
            self._logger.error("Negotiation with the Collector %s failed." % (collector))
            self._session.close()
    #end on_sandesh_ctrl_msg_receive

    def on_sandesh_uve_msg_send(self, sandesh_uve):
        self.enqueue_event(Event(event = Event._EV_SANDESH_UVE_SEND,
                                 msg = sandesh_uve))
    #end on_sandesh_uve_msg_send

    # Private functions

    def _create_session(self):
        assert self._session is None
        self._session = SandeshSession(self._connection.sandesh_instance(),
                                       self._active_collector,
                                       self.on_session_event,
                                       self._connection._receive_sandesh_msg) 
    #end _create_session

    def _delete_session(self):
        if self._session:
            self._session.close()
            self._session = None
            self._connection.reset_collector()
    #end _delete_session 

    def _start_idle_hold_timer(self):
        if self._idle_hold_timer is None:
            if self._IDLE_HOLD_TIME:
                self._idle_hold_timer = gevent.spawn_later(self._IDLE_HOLD_TIME,
                                            self._idle_hold_timer_expiry_handler)
            else:
                self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED))
    #end _start_idle_hold_timer

    def _cancel_idle_hold_timer(self):
        if self._idle_hold_timer is not None:
            gevent.kill(self._idle_hold_timer)
            self._idle_hold_timer = None
    #end _cancel_idle_hold_timer

    def _idle_hold_timer_expiry_handler(self):
        self._idle_hold_timer = None
        self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED))
    #end _idle_hold_timer_expiry_handler
    
    def _start_connect_timer(self):
        if self._connect_timer is None:
            self._connect_timer = gevent.spawn_later(self._CONNECT_TIME,
                                        self._connect_timer_expiry_handler, 
                                        self._session)
    #end _start_connect_timer

    def _cancel_connect_timer(self):
        if self._connect_timer is not None:
            gevent.kill(self._connect_timer)
            self._connect_timer = None
    #end _cancel_connect_timer

    def _connect_timer_expiry_handler(self, session):
        self._connect_timer = None
        self.enqueue_event(Event(event = Event._EV_CONNECT_TIMER_EXPIRED,
                                 session = session))
    #end _connect_timer_expiry_handler

    def _is_ready_to_dequeue_event(self):
        return True
    #end _is_ready_to_dequeue_event

    def _dequeue_event(self, event):
        self._logger.info("Processing event[%s] in state[%s]" \
                          % (event.event, self._fsm.current))
        if event.session is not None and event.session is not self._session:
            self._logger.info("Ignore event [%s] received for old session" \
                              % (event.event))
            return
        if event.event == Event._EV_COLLECTOR_CHANGE:
            old_active_collector = self._active_collector
            self._active_collector = event.primary_collector
            self._backup_collector = event.secondary_collector
            if old_active_collector == self._active_collector:
                self._logger.info("No change in active collector. Ignore event [%s]" \
                                  % (event.event))
                return
        if event.event == Event._EV_SANDESH_UVE_SEND:
            if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT:
                self._connection.handle_sandesh_uve_msg(event.msg)
            else:
                self._logger.info("Discarding event[%s] in state[%s]" \
                                  % (event.event, self._fsm.current))
        elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \
                self._fsm.current == State._ESTABLISHED:
            self._connection.handle_sandesh_ctrl_msg(event.msg)
        elif self._fsm.cannot(event.event) is True:
            self._logger.info("Unconsumed event[%s] in state[%s]" \
                              % (event.event, self._fsm.current))
        else:
            prev_state = self.state()
            getattr(self._fsm, event.event)(sm = self, sm_event = event)
            # Log state transition
            self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \
                              % (event.event, prev_state, self.state()))
Example #37
0
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(),
                          queue_lock=threading.Lock(),
                          launch_cmd=None,
                          env=None,
                          collector_queue=multiprocessing.Queue(),
                          see_worker_output=False,
                          data_dir=".",
                          full=False,
                          cancel_value=multiprocessing.Value('i', 1),
                          port=WORK_QUEUE_DEFAULT_PORT,
                          wq_log_dir=None,
                          project_password=None,
                          project_password_file=None,
                          project_name=None):

    logger.debug("Starting WorkQueue Submit/Wait Process")

    orig_ppid = os.getppid()

    wq_tasks = set()

    continue_running = True

    if wq_log_dir is not None:
        wq_debug_log = os.path.join(wq_log_dir, "debug")
        cctools_debug_flags_set("all")
        cctools_debug_config_file(wq_debug_log)

    logger.debug("Creating Workqueue Object")
    try:
        q = WorkQueue(port)
    except Exception as e:
        logger.error("Unable to create Workqueue object: {}", format(e))
        raise e

    if project_name:
        q.specify_name(project_name)

    if project_password:
        q.specify_password(project_password)
    elif project_password_file:
        q.specify_password_file(project_password_file)

    # Only write Logs when the log_dir is specified, which is most likely always will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)

        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    while (continue_running):
        # Monitor the Task Queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            continue_running = False
            continue

        # Submit Tasks
        while task_queue.qsize() > 0:
            if cancel_value.value == 0:
                continue_running = False
                break

            try:
                # item = task_queue.get_nowait()
                item = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue
            parsl_id = item["task_id"]

            function_data_loc = item["data_loc"]
            function_result_loc = item["result_loc"]
            function_result_loc_remote = function_result_loc.split("/")[-1]
            function_data_loc_remote = function_data_loc.split("/")[-1]

            input_files = item["input_files"]
            output_files = item["output_files"]
            std_files = item["std_files"]

            full_script_name = workqueue_worker.__file__
            script_name = full_script_name.split("/")[-1]

            remapping_string = ""

            std_string = ""
            logger.debug("looking at input")
            for item in input_files:
                if item[3] == "std":
                    std_string += "mv " + item[1] + " " + item[0] + "; "
                else:
                    remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            logger.debug("looking at output")
            for item in output_files:
                remapping_string += item[0] + ":" + item[1] + ","
            logger.debug(remapping_string)

            if len(input_files) + len(output_files) > 0:
                remapping_string = "-r " + remapping_string
                remapping_string = remapping_string[:-1]

            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                input_file=function_data_loc_remote,
                output_file=function_result_loc_remote,
                remapping_string=remapping_string)

            logger.debug(command_str)
            command_str = std_string + command_str
            logger.debug(command_str)

            logger.debug("Sending task {} with command: {}".format(
                parsl_id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                continue
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            t.specify_file(full_script_name,
                           script_name,
                           WORK_QUEUE_INPUT,
                           cache=True)
            t.specify_file(function_result_loc,
                           function_result_loc_remote,
                           WORK_QUEUE_OUTPUT,
                           cache=False)
            t.specify_file(function_data_loc,
                           function_data_loc_remote,
                           WORK_QUEUE_INPUT,
                           cache=False)
            t.specify_tag(str(parsl_id))

            for item in input_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_INPUT,
                               cache=item[2])

            for item in output_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            for item in std_files:
                t.specify_file(item[0],
                               item[1],
                               WORK_QUEUE_OUTPUT,
                               cache=item[2])

            logger.debug("Submitting task {} to workqueue".format(parsl_id))
            try:
                wq_id = q.submit(t)
                wq_tasks.add(wq_id)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))

                msg = {
                    "tid": parsl_id,
                    "result_received": False,
                    "reason": "Workqueue Task Start Failure",
                    "status": 1
                }

                collector_queue.put_nowait(msg)
                continue

            logger.debug("Task {} submitted workqueue with id {}".format(
                parsl_id, wq_id))

        if cancel_value.value == 0:
            continue_running = False

        # Wait for Tasks
        task_found = True
        # If the queue is not empty wait on the workqueue queue for a task
        if not q.empty() and continue_running:
            while task_found is True:
                if cancel_value.value == 0:
                    continue_running = False
                    task_found = False
                    continue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                else:
                    parsl_tid = t.tag
                    logger.debug(
                        "Completed workqueue task {}, parsl task {}".format(
                            t.id, parsl_tid))
                    status = t.return_status
                    task_result = t.result
                    msg = None

                    if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS
                                       and task_result !=
                                       WORK_QUEUE_RESULT_OUTPUT_MISSING):
                        if task_result == WORK_QUEUE_RESULT_SUCCESS:
                            logger.debug(
                                "Workqueue task {} failed with status {}".
                                format(t.id, status))

                            reason = "Wrapper Script Failure: "
                            if status == 1:
                                reason += "command line parsing"
                            if status == 2:
                                reason += "problem loading function data"
                            if status == 3:
                                reason += "problem remapping file names"
                            if status == 4:
                                reason += "problem writing out function result"

                            reason += "\nTrace:\n" + t.output

                            logger.debug(
                                "Workqueue runner script failed for task {} because {}\n"
                                .format(parsl_tid, reason))

                        else:
                            reason = "Workqueue system failure\n"

                        msg = {
                            "tid": parsl_tid,
                            "result_received": False,
                            "reason": reason,
                            "status": status
                        }

                        collector_queue.put_nowait(msg)

                    else:

                        if see_worker_output:
                            print(t.output)

                        result_loc = os.path.join(
                            data_dir,
                            "task_" + str(parsl_tid) + "_function_result")
                        logger.debug(
                            "Looking for result in {}".format(result_loc))
                        f = open(result_loc, "rb")
                        result = pickle.load(f)
                        f.close()

                        msg = {
                            "tid": parsl_tid,
                            "result_received": True,
                            "result": result
                        }
                        wq_tasks.remove(t.id)

                    collector_queue.put_nowait(msg)

        if continue_running is False:
            logger.debug("Exiting WorkQueue Master Thread event loop")
            break

    for wq_task in wq_tasks:
        logger.debug("Cancelling Workqueue Task {}".format(wq_task))
        q.cancel_by_taskid(wq_task)

    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
Example #38
0
def _work_queue_submit_wait(task_queue=multiprocessing.Queue(),
                            launch_cmd=None,
                            env=None,
                            collector_queue=multiprocessing.Queue(),
                            data_dir=".",
                            full=False,
                            shared_fs=False,
                            autolabel=False,
                            autolabel_window=None,
                            autocategory=False,
                            should_stop=None,
                            port=WORK_QUEUE_DEFAULT_PORT,
                            wq_log_dir=None,
                            project_password_file=None,
                            project_name=None):
    """Thread to handle Parsl app submissions to the Work Queue objects.
    Takes in Parsl functions submitted using submit(), and creates a
    Work Queue task with the appropriate specifications, which is then
    submitted to Work Queue. After tasks are completed, processes the
    exit status and exit code of the task, and sends results to the
    Work Queue collector thread.
    To avoid python's global interpreter lock with work queue's wait, this
    function should be launched as a process, not as a lightweight thread. This
    means that any communication should be done using the multiprocessing
    module capabilities, rather than shared memory.
    """
    logger.debug("Starting WorkQueue Submit/Wait Process")

    # Enable debugging flags and create logging file
    wq_debug_log = None
    if wq_log_dir is not None:
        logger.debug("Setting debugging flags and creating logging file")
        wq_debug_log = os.path.join(wq_log_dir, "debug_log")

    # Create WorkQueue queue object
    logger.debug("Creating WorkQueue Object")
    try:
        logger.debug("Listening on port {}".format(port))
        q = WorkQueue(port, debug_log=wq_debug_log)
    except Exception as e:
        logger.error("Unable to create WorkQueue object: {}".format(e))
        raise e

    # Specify WorkQueue queue attributes
    if project_name:
        q.specify_name(project_name)

    if project_password_file:
        q.specify_password_file(project_password_file)

    if autolabel:
        q.enable_monitoring()
        if autolabel_window is not None:
            q.tune('category-steady-n-tasks', autolabel_window)

    # Only write logs when the wq_log_dir is specified, which it most likely will be
    if wq_log_dir is not None:
        wq_master_log = os.path.join(wq_log_dir, "master_log")
        wq_trans_log = os.path.join(wq_log_dir, "transaction_log")
        if full:
            wq_resource_log = os.path.join(wq_log_dir, "resource_logs")
            q.enable_monitoring_full(dirname=wq_resource_log)
        q.specify_log(wq_master_log)
        q.specify_transactions_log(wq_trans_log)

    orig_ppid = os.getppid()

    result_file_of_task_id = {
    }  # Mapping taskid -> result file for active tasks.

    while not should_stop.value:
        # Monitor the task queue
        ppid = os.getppid()
        if ppid != orig_ppid:
            logger.debug("new Process")
            break

        # Submit tasks
        while task_queue.qsize() > 0 and not should_stop.value:
            # Obtain task from task_queue
            try:
                task = task_queue.get(timeout=1)
                logger.debug("Removing task from queue")
            except queue.Empty:
                continue

            pkg_pfx = ""
            if task.env_pkg is not None:
                pkg_pfx = "./{} -e {} ".format(
                    os.path.basename(package_run_script),
                    os.path.basename(task.env_pkg))

            # Create command string
            logger.debug(launch_cmd)
            command_str = launch_cmd.format(
                package_prefix=pkg_pfx,
                mapping=os.path.basename(task.map_file),
                function=os.path.basename(task.function_file),
                result=os.path.basename(task.result_file))
            logger.debug(command_str)

            # Create WorkQueue task for the command
            logger.debug("Sending task {} with command: {}".format(
                task.id, command_str))
            try:
                t = Task(command_str)
            except Exception as e:
                logger.error("Unable to create task: {}".format(e))
                collector_queue.put_nowait(
                    WqTaskToParsl(
                        id=task.id,
                        result_received=False,
                        result=None,
                        reason="task could not be created by work queue",
                        status=-1))
                continue

            t.specify_category(task.category)
            if autolabel:
                q.specify_category_mode(
                    task.category, WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT)

            # Specify environment variables for the task
            if env is not None:
                for var in env:
                    t.specify_environment_variable(var, env[var])

            if task.env_pkg is not None:
                t.specify_input_file(package_run_script, cache=True)
                t.specify_input_file(task.env_pkg, cache=True)

            # Specify script, and data/result files for task
            t.specify_input_file(exec_parsl_function.__file__, cache=True)
            t.specify_input_file(task.function_file, cache=False)
            t.specify_input_file(task.map_file, cache=False)
            t.specify_output_file(task.result_file, cache=False)
            t.specify_tag(str(task.id))
            result_file_of_task_id[str(task.id)] = task.result_file

            logger.debug("Parsl ID: {}".format(task.id))

            # Specify input/output files that need to be staged.
            # Absolute paths are assumed to be in shared filesystem, and thus
            # not staged by work queue.
            if not shared_fs:
                for spec in task.input_files:
                    if spec.stage:
                        t.specify_input_file(spec.parsl_name,
                                             spec.parsl_name,
                                             cache=spec.cache)
                for spec in task.output_files:
                    if spec.stage:
                        t.specify_output_file(spec.parsl_name,
                                              spec.parsl_name,
                                              cache=spec.cache)

            # Submit the task to the WorkQueue object
            logger.debug("Submitting task {} to WorkQueue".format(task.id))
            try:
                wq_id = q.submit(t)
            except Exception as e:
                logger.error(
                    "Unable to submit task to work queue: {}".format(e))
                collector_queue.put_nowait(
                    WqTaskToParsl(
                        id=task.id,
                        result_received=False,
                        result=None,
                        reason="task could not be submited to work queue",
                        status=-1))
                continue
            logger.debug("Task {} submitted to WorkQueue with id {}".format(
                task.id, wq_id))

        # If the queue is not empty wait on the WorkQueue queue for a task
        task_found = True
        if not q.empty():
            while task_found and not should_stop.value:
                # Obtain the task from the queue
                t = q.wait(1)
                if t is None:
                    task_found = False
                    continue
                # When a task is found:
                parsl_id = t.tag
                logger.debug(
                    "Completed WorkQueue task {}, parsl task {}".format(
                        t.id, t.tag))
                result_file = result_file_of_task_id.pop(t.tag)

                # A tasks completes 'succesfully' if it has result file,
                # and it can be loaded. This may mean that the 'success' is
                # an exception.
                logger.debug("Looking for result in {}".format(result_file))
                try:
                    with open(result_file, "rb") as f_in:
                        result = pickle.load(f_in)
                    logger.debug("Found result in {}".format(result_file))
                    collector_queue.put_nowait(
                        WqTaskToParsl(id=parsl_id,
                                      result_received=True,
                                      result=result,
                                      reason=None,
                                      status=t.return_status))
                # If a result file could not be generated, explain the
                # failure according to work queue error codes. We generate
                # an exception and wrap it with RemoteExceptionWrapper, to
                # match the positive case.
                except Exception as e:
                    reason = _explain_work_queue_result(t)
                    logger.debug(
                        "Did not find result in {}".format(result_file))
                    logger.debug(
                        "Wrapper Script status: {}\nWorkQueue Status: {}".
                        format(t.return_status, t.result))
                    logger.debug(
                        "Task with id parsl {} / wq {} failed because:\n{}".
                        format(parsl_id, t.id, reason))
                    collector_queue.put_nowait(
                        WqTaskToParsl(id=parsl_id,
                                      result_received=False,
                                      result=e,
                                      reason=reason,
                                      status=t.return_status))
    logger.debug("Exiting WorkQueue Monitoring Process")
    return 0
    def __init__(self, connection, logger, primary_collector, 
                 secondary_collector):

        def _update_connection_state(e, status):
            from connection_info import ConnectionState
            from gen_py.process_info.ttypes import ConnectionType
            collector_addr = e.sm._active_collector
            if collector_addr is None:
                collector_addr = ''
            ConnectionState.update(conn_type = ConnectionType.COLLECTOR,
                name = '',
                status = status,
                server_addrs = [collector_addr],
                message = '%s to %s on %s' % (e.src, e.dst, e.event))
        #end _update_connection_state

        def _connection_state_up(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.UP)
        #end _connection_state_up

        def _connection_state_down(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.DOWN)
        #end _connection_state_down

        def _connection_state_init(e):
            from gen_py.process_info.ttypes import ConnectionStatus
            _update_connection_state(e, ConnectionStatus.INIT)
        #end _connection_state_init

        def _on_idle(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # Reset active and backup collector
            self._active_collector = self._connection.primary_collector()
            self._backup_collector = self._connection.secondary_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._disable != True:
	        e.sm._start_idle_hold_timer()
            # update connection state
            _connection_state_down(e)
        #end _on_idle

        def _on_disconnect(e):
            # update connection state
            _connection_state_down(e)
        #end _on_disconnect

        def _on_connect(e):
            if e.sm._idle_hold_timer is not None:
                e.sm._cancel_idle_hold_timer()
            e.sm._connection.reset_collector()
            # clean up existing connection
            e.sm._delete_session()
            if e.sm._active_collector is not None:
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN))
        #end _on_connect

        def _on_connect_to_backup(e):
            if e.sm._connect_timer is not None:
                e.sm._cancel_connect_timer()
            # clean up existing connection
            e.sm._delete_session()
            # try to connect to the backup collector, if known
            if e.sm._backup_collector is not None:
                e.sm._active_collector, e.sm._backup_collector = \
                    e.sm._backup_collector, e.sm._active_collector
                # update connection state
                _connection_state_init(e)
                e.sm._create_session()
                e.sm._start_connect_timer()
                e.sm._session.connect()
            else:
                e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN))
        #end _on_connect_to_backup

        def _on_client_init(e):
            e.sm._connects += 1
            gevent.spawn(e.sm._session.read)
            e.sm._connection.handle_initialized(e.sm._connects)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_init(e)
        #end _on_client_init
        
        def _on_established(e):
            e.sm._cancel_connect_timer()
            e.sm._connection.set_collector(e.sm_event.source)
            e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg)
            e.sm._connection.sandesh_instance().send_generator_info()
            # update connection state
            _connection_state_up(e)
        #end _on_established

        # FSM - Fysom
        self._fsm = Fysom({
                           'initial': {'state' : State._IDLE,
                                       'event' : Event._EV_START,
                                       'defer' : True
                                      },
                           'events': [
                                      # _IDLE
                                      {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },
                                      {'name' : Event._EV_START,
                                       'src'  : State._IDLE,
                                       'dst'  : State._CONNECT
                                      },

                                      # _DISCONNECT 
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._DISCONNECT,
                                       'dst'  : State._CONNECT
                                      },

                                      # _CONNECT
                                      {'name' : Event._EV_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._DISCONNECT
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CONNECT_TO_BACKUP
                                      {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECT_FAIL,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CONNECTED,
                                       'src'  : State._CONNECT_TO_BACKUP,
                                       'dst'  : State._CLIENT_INIT
                                      },

                                      # _CLIENT_INIT
                                      {'name' : Event._EV_CONNECT_TIMER_EXPIRED,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV,
                                       'src'  : State._CLIENT_INIT,
                                       'dst'  : State._ESTABLISHED
                                      },

                                      # _ESTABLISHED
                                      {'name' : Event._EV_TCP_CLOSE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT_TO_BACKUP
                                      },
                                      {'name' : Event._EV_STOP,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._IDLE
                                      },
                                      {'name' : Event._EV_COLLECTOR_CHANGE,
                                       'src'  : State._ESTABLISHED,
                                       'dst'  : State._CONNECT
                                      }
                                     ],
                           'callbacks': {
                                         'on' + State._IDLE : _on_idle,
                                         'on' + State._CONNECT : _on_connect,
                                         'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup,
                                         'on' + State._CLIENT_INIT : _on_client_init,
                                         'on' + State._ESTABLISHED : _on_established,
                                        }
                          })

        self._connection = connection
        self._session = None
        self._connects = 0
        self._disable = False
        self._idle_hold_timer = None
        self._connect_timer = None
        self._active_collector = primary_collector
        self._backup_collector = secondary_collector
        self._logger = logger
        self._event_queue = WorkQueue(self._dequeue_event,
                                      self._is_ready_to_dequeue_event)