def __init__(self, template, debug=False, dry_run=False): self.config = Configuration() self.debug = debug self.dry_run = dry_run # The work queue will figure out a valid combination of MongoDB access # parameters, e.g., host/port, URI, or replica set discovery via DNS self.wq = WorkQueue(host=self.config.mongodb_host, port=self.config.mongodb_port, uri=self.config.mongodb_uri, srv_name=self.config.mongodb_rs_srv, database=self.config.mongodb_queue_db, replicaset=self.config.mongodb_rs, collection=self.config.mongodb_queue_col) if not os.path.exists(template): raise Exception("Template file does not exist") self.template_dir = os.path.dirname(template) self.template_file = os.path.basename(template) if self.template_file == "": raise Exception("Template must be a file, not a directory") self.jinja = jinja2.Environment(loader=jinja2.FileSystemLoader( self.template_dir), autoescape=False)
def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._host_ip = host_ip self._client_context = client_context self._collectors = collectors self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) self._stats = SandeshStats() self._trace = trace.Trace() self._sandesh_request_dict = {} self._alarm_ack_callback = alarm_ack_callback self._uve_type_maps = SandeshUVETypeMaps(self._logger) if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) self._gev_httpd = None if http_port != -1: self._http_server = SandeshHttp(self, module, http_port, sandesh_req_uve_pkg_list) self._gev_httpd = gevent.spawn(self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] if self._connect_to_collector: self._client = SandeshClient(self, primary_collector, secondary_collector, discovery_client) self._client.initiate()
def run(self, args): topic_mgr = self.get_topic_manager() if not topic_mgr: print('invalid proxy') return 2 topic_name = "ProgressTopic" try: topic = topic_mgr.retrieve(topic_name) except IceStorm.NoSuchTopic: topic = topic_mgr.create(topic_name) publisher = topic.getPublisher() progress = Downloader.ProgressEventPrx.uncheckedCast(publisher) if progress is None: print("Progress topic not found") work_queue = WorkQueue(progress) ic = self.communicator() adapter = ic.createObjectAdapter("FactoryAdapter") servant = SchedulerFactoryI(work_queue, ic) proxy = adapter.add(servant, ic.stringToIdentity("Factory1")) work_queue.start() adapter.activate() print(proxy, flush=True) self.shutdownOnInterrupt() ic.waitForShutdown() return 0
def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None, config=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or [] self._host_ip = host_ip self._client_context = client_context self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._send_level = SandeshLevel.INVALID self._init_logger(self._module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) from sandesh_stats import SandeshMessageStatistics self._msg_stats = SandeshMessageStatistics() self._trace = trace.Trace() self._sandesh_request_map = {} self._alarm_ack_callback = alarm_ack_callback self._config = config or SandeshConfig.from_parser_arguments() self._uve_type_maps = SandeshUVETypeMaps(self._logger) # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in self._sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) if self._config.disable_object_logs is not None: self.disable_sending_object_logs(self._config.disable_object_logs) if self._config.system_logs_rate_limit is not None: SandeshSystem.set_sandesh_send_rate_limit( self._config.system_logs_rate_limit) self._gev_httpd = None if http_port != -1: self.run_introspect_server(http_port) if self._connect_to_collector: self._client = SandeshClient(self) self._client.initiate(collectors)
def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler): self._sandesh_instance = sandesh_instance self._logger = sandesh_instance._logger self._event_handler = event_handler self._reader = SandeshReader(self, sandesh_msg_handler) self._writer = SandeshWriter(self) self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh) TcpSession.__init__(self, server)
def main(): args = parse_args() # Create a temporary file store for task results. tmpdir = tempfile.mkdtemp() # Load the task input data here task_inputs = load_inputs() print('Creating tasks') tasks = generate_tasks(args.command, task_inputs, args.infiles, args.outfile, tmpdir, args.max_retries) # Create the Work Queue master that manages task distribution. work_queue.cctools_debug_flags_set("all") work_queue.cctools_debug_config_file(f'{args.name}.debug') work_queue.cctools_debug_config_file_size(0) wq = WorkQueue(port=args.port, name=args.name, shutdown=True) wq.specify_log(f'{args.name}.log') # Submit all tasks to the queue. print('Submitting tasks') for t in tasks.values(): wq.submit(t) # The main loop waits for a task to get done then handles success or # failure accordingly print('Entering main loop') while not all([done_check(t) for t in tasks.values()]): t = wq.wait(10) # This blocks for 10s or until a task is done. if t is not None: tasks[t.tag] = t # Update the task map with the correct status # On success, post-process the task. If the maximum number of # submissions for a task has been reached, make a note. Otherwise, # report the failure and resubmit. if t.return_status == 0 and t.result == WORK_QUEUE_RESULT_SUCCESS: print(f'Task {t.tag} completed successfully.') input_idx = int(t.tag.split('_')[1]) handle_success(t, tmpdir, args.outfile) elif t.result == WORK_QUEUE_RESULT_MAX_RETRIES: print(f'Task {t.tag} resubmitted too many times.') else: wq.submit(t) print(f'Task {t.tag} failed with result {t.result}') print(t.output) print('All tasks completed or hit max retries.') print('Cleaning up...') shutil.rmtree(tmpdir) print('Done')
def init_generator(self, module, source, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._client_context = client_context self._collectors = collectors self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(module) self._stats = SandeshStats() self._trace = trace.Trace() self._sandesh_request_dict = {} self._uve_type_maps = SandeshUVETypeMaps() if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) self._http_server = SandeshHttp(self, module, http_port, sandesh_req_uve_pkg_list) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] gevent.spawn(self._http_server.start_http_server) self._client = SandeshClient(self, primary_collector, secondary_collector, discovery_client) self._client.initiate()
def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._client_context = client_context self._collectors = collectors self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(source + ':' + module + ':' + node_type + ':' \ + instance_id) self._stats = SandeshStats() self._trace = trace.Trace() self._sandesh_request_dict = {} self._uve_type_maps = SandeshUVETypeMaps() if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) if http_port != -1: self._http_server = SandeshHttp(self, module, http_port, sandesh_req_uve_pkg_list) gevent.spawn(self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] self._client = SandeshClient(self, primary_collector, secondary_collector, discovery_client) self._client.initiate()
def run(self, argv): work_queue = WorkQueue() servant = MathI(work_queue) broker = self.communicator() adapter = broker.createObjectAdapter("MathAdapter") print adapter.add(servant, broker.stringToIdentity("math1")) adapter.activate() work_queue.start() self.shutdownOnInterrupt() broker.waitForShutdown() work_queue.destroy() return 0
def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._host_ip = host_ip self._client_context = client_context self._collectors = collectors self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._send_level = SandeshLevel.INVALID self._init_logger(module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) from sandesh_stats import SandeshMessageStatistics self._msg_stats = SandeshMessageStatistics() self._trace = trace.Trace() self._sandesh_request_map = {} self._alarm_ack_callback = alarm_ack_callback self._uve_type_maps = SandeshUVETypeMaps(self._logger) if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) self._gev_httpd = None if http_port != -1: self._http_server = SandeshHttp( self, module, http_port, sandesh_req_uve_pkg_list) self._gev_httpd = gevent.spawn(self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] if self._connect_to_collector: self._client = SandeshClient( self, primary_collector, secondary_collector, discovery_client) self._client.initiate()
def run(self, args): broker = self.communicator() adapter = broker.createObjectAdapter('FactoryAdapter') publisher = self.get_topic(STATUS_TOPIC).getPublisher() progress_publisher = Downloader.ProgressEventPrx.uncheckedCast( publisher) print("ProgressTopic created") queue = WorkQueue(progress_publisher) servant = SchedulerFactoryI(queue) proxy = adapter.addWithUUID(servant) sync_topic = self.get_topic(SYNC_TOPIC) pub = sync_topic.subscribeAndGetPublisher({}, proxy) servant.sync_publisher = Downloader.SyncEventPrx.uncheckedCast(pub) print("SynTopic created") print(proxy, flush=True) adapter.activate() queue.start() self.shutdownOnInterrupt() broker.waitForShutdown() queue.destroy() return 0
def run(self, args): '''Crea los canales de eventos y el proxy que sera mostrado por teminal para que el cliente pueda conectarse a el''' adapter = self.communicator().createObjectAdapter("FactoryAdapter") Downloader.SyncEventPrx.uncheckedCast( self.get_topic(SYNC_TOPIC).getPublisher()) progress = Downloader.ProgressEventPrx.uncheckedCast( self.get_topic(PROGRESS_TOPIC).getPublisher()) queue = WorkQueue(progress) proxy = adapter.addWithUUID(SchedulerFactoryI(queue)) print(proxy, flush=True) topic_mgr = self.get_topic(SYNC_TOPIC) servant = SyncEventI() subscriber = adapter.addWithUUID(servant) qos = {} topic_mgr.subscribeAndGetPublisher(qos, subscriber) while self.communicator().isShutdown(): subscriber.requestSync() time.sleep(10) adapter.activate() queue.start() self.shutdownOnInterrupt() self.communicator().waitForShutdown() queue.destroy() return 0
def run(self, argv): topic_mgr = self.get_topic_manager() #proxy to topic if not topic_mgr: print(': invalid proxy') return 2 topic_name = "ProgressTopic" try: topic = topic_mgr.retrieve(topic_name) except IceStorm.NoSuchTopic: topic = topic_mgr.create(topic_name) publisher = topic.getPublisher(); progress_topic = Example.ProgressSubscriberPrx.unCheckedCast(publisher); work_queue = WorkQueue(progress_top) servant = DownloaderI(work_queue) adapter = broker.createObjectAdapter("DownloaderAdapter") printer(adapter.add(servant, broker.stringToIdentity("dowloader1"))) work_queue.start(); self.shutdownOnInterrupt() broker.waitForShutdown() work_queue.destroy() return 0
def __init__(self, project, port, log_freq=600): # 600 seconds """Initialize the QMaster Parameters ---------- project : port : int log_freq : int, optional frequency to print info about the status of the work queue. In units of seconds. Default is to print every 10 minutes. """ threading.Thread.__init__(self) self.project = project self.log_freq = log_freq # print time in seconds self.wake_freq = 1 # seconds self.wq = WorkQueue(port, name='MSMAccelerator', catalog=True, exclusive=False) logger.info('WORK QUEUE MASTER LISTENING ON PORT: %d', self.wq.port) logger.info('(Start a local worker with >> work_queue_worker -d all localhost %d & )', self.wq.port) # method controls whether or not we need to bring back solvated_xtc as well if self.project.method == 'explicit': self.return_wet_xtc = True elif self.project.method == 'implicit': self.return_wet_xtc = False else: raise Exception("project.method must be 'explicit' or 'implicit'") logger.info('Return wet xtc set to %s', self.return_wet_xtc) # what does this specify algorithm do? self.wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS) # fast abort kills jobs that appear to be stragling (taking more than 1.5x average) #self.wq.activate_fast_abort(1.5) # setting the stop event signals for the thread to die self._stop = threading.Event() # the thread sets the event every time a job returns or there are no waiting jobs # and it finished post processing. See the wait method self._mainloop_wake_event_cause = None self._mainloop_wake_event = threading.Event() # start the thread self.start()
def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None, config=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or [] self._host_ip = host_ip self._client_context = client_context self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(self._module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) from sandesh_stats import SandeshMessageStatistics self._msg_stats = SandeshMessageStatistics() self._trace = trace.Trace() self._sandesh_request_map = {} self._alarm_ack_callback = alarm_ack_callback self._config = config or SandeshConfig.from_parser_arguments() self._uve_type_maps = SandeshUVETypeMaps(self._logger) # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in self._sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) if self._config.disable_object_logs is not None: self.disable_sending_object_logs(self._config.disable_object_logs) if self._config.system_logs_rate_limit is not None: SandeshSystem.set_sandesh_send_rate_limit( self._config.system_logs_rate_limit) self._gev_httpd = None if http_port != -1: self.run_introspect_server(http_port) if self._connect_to_collector: self._client = SandeshClient(self) self._client.initiate(collectors)
async def main(): r = redis.Redis() wq = WorkQueue(r, tidy_interval=5, stale_time=3) prod = Producer(work_queue=wq, delay=0.75) worker = Worker("Good Worker", work_queue=wq, delay=1, fail_rate=0.2) bad_worker = Worker("Bad Worker", work_queue=wq, delay=2.5, fail_rate=0.7) try: print('Start') await wq.schedule_tidy_task() await prod.start() await worker.start() await bad_worker.start() await asyncio.sleep(61) finally: await wq.stop_tidy_task() await prod.stop() await worker.stop() await bad_worker.stop()
def __init__(self, connection, logger, primary_collector, secondary_collector): def _update_connection_state(e, status): from connection_info import ConnectionState from gen_py.process_info.ttypes import ConnectionType collector_addr = e.sm._active_collector if collector_addr is None: collector_addr = '' ConnectionState.update(conn_type=ConnectionType.COLLECTOR, name='', status=status, server_addrs=[collector_addr], message='%s to %s on %s' % (e.src, e.dst, e.event)) #end _update_connection_state def _connection_state_up(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.UP) #end _connection_state_up def _connection_state_down(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.DOWN) #end _connection_state_down def _connection_state_init(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.INIT) #end _connection_state_init def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # Reset active and backup collector self._active_collector = self._connection.primary_collector() self._backup_collector = self._connection.secondary_collector() # clean up existing connection e.sm._delete_session() if e.sm._disable != True: e.sm._start_idle_hold_timer() # update connection state _connection_state_down(e) #end _on_idle def _on_disconnect(e): # update connection state _connection_state_down(e) #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._connection.reset_collector() # clean up existing connection e.sm._delete_session() if e.sm._active_collector is not None: # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event=Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_connect_to_backup(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() # try to connect to the backup collector, if known if e.sm._backup_collector is not None: e.sm._active_collector, e.sm._backup_collector = \ e.sm._backup_collector, e.sm._active_collector # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event( Event(event=Event._EV_BACKUP_COLLECTOR_UNKNOWN)) #end _on_connect_to_backup def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_init(e) #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._connection.set_collector(e.sm_event.source) e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_up(e) #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': { 'state': State._IDLE, 'event': Event._EV_START, 'defer': True }, 'events': [ # _IDLE { 'name': Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src': State._IDLE, 'dst': State._CONNECT }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._IDLE, 'dst': State._CONNECT }, { 'name': Event._EV_START, 'src': State._IDLE, 'dst': State._CONNECT }, # _DISCONNECT { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._DISCONNECT, 'dst': State._CONNECT }, # _CONNECT { 'name': Event._EV_COLLECTOR_UNKNOWN, 'src': State._CONNECT, 'dst': State._DISCONNECT }, { 'name': Event._EV_TCP_CONNECT_FAIL, 'src': State._CONNECT, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CONNECT, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CONNECT, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECTED, 'src': State._CONNECT, 'dst': State._CLIENT_INIT }, # _CONNECT_TO_BACKUP { 'name': Event._EV_BACKUP_COLLECTOR_UNKNOWN, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECT_FAIL, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECTED, 'src': State._CONNECT_TO_BACKUP, 'dst': State._CLIENT_INIT }, # _CLIENT_INIT { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CLOSE, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src': State._CLIENT_INIT, 'dst': State._ESTABLISHED }, # _ESTABLISHED { 'name': Event._EV_TCP_CLOSE, 'src': State._ESTABLISHED, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_STOP, 'src': State._ESTABLISHED, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._ESTABLISHED, 'dst': State._CONNECT } ], 'callbacks': { 'on' + State._IDLE: _on_idle, 'on' + State._CONNECT: _on_connect, 'on' + State._CONNECT_TO_BACKUP: _on_connect_to_backup, 'on' + State._CLIENT_INIT: _on_client_init, 'on' + State._ESTABLISHED: _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._disable = False self._idle_hold_timer = None self._connect_timer = None self._active_collector = primary_collector self._backup_collector = secondary_collector self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event)
class SandeshStateMachine(object): _IDLE_HOLD_TIME = 5 # in seconds _CONNECT_TIME = 30 # in seconds def __init__(self, connection, logger, primary_collector, secondary_collector): def _update_connection_state(e, status): from connection_info import ConnectionState from gen_py.process_info.ttypes import ConnectionType collector_addr = e.sm._active_collector if collector_addr is None: collector_addr = '' ConnectionState.update(conn_type=ConnectionType.COLLECTOR, name='', status=status, server_addrs=[collector_addr], message='%s to %s on %s' % (e.src, e.dst, e.event)) #end _update_connection_state def _connection_state_up(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.UP) #end _connection_state_up def _connection_state_down(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.DOWN) #end _connection_state_down def _connection_state_init(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.INIT) #end _connection_state_init def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # Reset active and backup collector self._active_collector = self._connection.primary_collector() self._backup_collector = self._connection.secondary_collector() # clean up existing connection e.sm._delete_session() if e.sm._disable != True: e.sm._start_idle_hold_timer() # update connection state _connection_state_down(e) #end _on_idle def _on_disconnect(e): # update connection state _connection_state_down(e) #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._connection.reset_collector() # clean up existing connection e.sm._delete_session() if e.sm._active_collector is not None: # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event=Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_connect_to_backup(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() # try to connect to the backup collector, if known if e.sm._backup_collector is not None: e.sm._active_collector, e.sm._backup_collector = \ e.sm._backup_collector, e.sm._active_collector # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event( Event(event=Event._EV_BACKUP_COLLECTOR_UNKNOWN)) #end _on_connect_to_backup def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_init(e) #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._connection.set_collector(e.sm_event.source) e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_up(e) #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': { 'state': State._IDLE, 'event': Event._EV_START, 'defer': True }, 'events': [ # _IDLE { 'name': Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src': State._IDLE, 'dst': State._CONNECT }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._IDLE, 'dst': State._CONNECT }, { 'name': Event._EV_START, 'src': State._IDLE, 'dst': State._CONNECT }, # _DISCONNECT { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._DISCONNECT, 'dst': State._CONNECT }, # _CONNECT { 'name': Event._EV_COLLECTOR_UNKNOWN, 'src': State._CONNECT, 'dst': State._DISCONNECT }, { 'name': Event._EV_TCP_CONNECT_FAIL, 'src': State._CONNECT, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CONNECT, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CONNECT, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECTED, 'src': State._CONNECT, 'dst': State._CLIENT_INIT }, # _CONNECT_TO_BACKUP { 'name': Event._EV_BACKUP_COLLECTOR_UNKNOWN, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECT_FAIL, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CONNECT_TO_BACKUP, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CONNECTED, 'src': State._CONNECT_TO_BACKUP, 'dst': State._CLIENT_INIT }, # _CLIENT_INIT { 'name': Event._EV_CONNECT_TIMER_EXPIRED, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_TCP_CLOSE, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._CLIENT_INIT, 'dst': State._IDLE }, { 'name': Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src': State._CLIENT_INIT, 'dst': State._ESTABLISHED }, # _ESTABLISHED { 'name': Event._EV_TCP_CLOSE, 'src': State._ESTABLISHED, 'dst': State._CONNECT_TO_BACKUP }, { 'name': Event._EV_STOP, 'src': State._ESTABLISHED, 'dst': State._IDLE }, { 'name': Event._EV_COLLECTOR_CHANGE, 'src': State._ESTABLISHED, 'dst': State._CONNECT } ], 'callbacks': { 'on' + State._IDLE: _on_idle, 'on' + State._CONNECT: _on_connect, 'on' + State._CONNECT_TO_BACKUP: _on_connect_to_backup, 'on' + State._CLIENT_INIT: _on_client_init, 'on' + State._ESTABLISHED: _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._disable = False self._idle_hold_timer = None self._connect_timer = None self._active_collector = primary_collector self._backup_collector = secondary_collector self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event) #end __init__ # Public functions def initialize(self): self.enqueue_event(Event(event=Event._EV_START)) #end initialize def session(self): return self._session #end session def state(self): return self._fsm.current #end state def shutdown(self): self._disable = True self.enqueue_event(Event(event=Event._EV_STOP)) #end shutdown def set_admin_state(self, down): if down == True: self._disable = True self.enqueue_event(Event(event=Event._EV_STOP)) else: self._disable = False self.enqueue_event(Event(event=Event._EV_START)) #end set_admin_state def connect_count(self): return self._connects #end connect_count def active_collector(self): return self._active_collector #end active_collector def backup_collector(self): return self._backup_collector #end backup_collector def enqueue_event(self, event): self._event_queue.enqueue(event) #end enqueue_event def on_session_event(self, session, event): if session is not self._session: self._logger.error( "Ignore session event [%d] received for old session" % (event)) return if SandeshSession.SESSION_ESTABLISHED == event: self._logger.info("Session Event: TCP Connected") self.enqueue_event( Event(event=Event._EV_TCP_CONNECTED, session=session)) elif SandeshSession.SESSION_ERROR == event: self._logger.error("Session Event: TCP Connect Fail") self.enqueue_event( Event(event=Event._EV_TCP_CONNECT_FAIL, session=session)) elif SandeshSession.SESSION_CLOSE == event: self._logger.error("Session Event: TCP Connection Closed") self.enqueue_event( Event(event=Event._EV_TCP_CLOSE, session=session)) else: self._logger.error("Received unknown session event [%d]" % (event)) #end on_session_event def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector): if sandesh_ctrl.success == True: self.enqueue_event( Event(event=Event._EV_SANDESH_CTRL_MESSAGE_RECV, session=session, msg=sandesh_ctrl, source=collector)) else: # Negotiation with the Collector failed, reset the # connection and retry after sometime. self._logger.error("Negotiation with the Collector %s failed." % (collector)) self._session.close() #end on_sandesh_ctrl_msg_receive def on_sandesh_uve_msg_send(self, sandesh_uve): self.enqueue_event( Event(event=Event._EV_SANDESH_UVE_SEND, msg=sandesh_uve)) #end on_sandesh_uve_msg_send # Private functions def _create_session(self): assert self._session is None col_info = self._active_collector.split(':') collector = (col_info[0], int(col_info[1])) self._session = SandeshSession(self._connection.sandesh_instance(), collector, self.on_session_event, self._connection._receive_sandesh_msg) #end _create_session def _delete_session(self): if self._session: self._session.close() self._session = None self._connection.reset_collector() #end _delete_session def _start_idle_hold_timer(self): if self._idle_hold_timer is None: if self._IDLE_HOLD_TIME: self._idle_hold_timer = gevent.spawn_later( self._IDLE_HOLD_TIME, self._idle_hold_timer_expiry_handler) else: self.enqueue_event( Event(event=Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _start_idle_hold_timer def _cancel_idle_hold_timer(self): if self._idle_hold_timer is not None: gevent.kill(self._idle_hold_timer) self._idle_hold_timer = None #end _cancel_idle_hold_timer def _idle_hold_timer_expiry_handler(self): self._idle_hold_timer = None self.enqueue_event(Event(event=Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _idle_hold_timer_expiry_handler def _start_connect_timer(self): if self._connect_timer is None: self._connect_timer = gevent.spawn_later( self._CONNECT_TIME, self._connect_timer_expiry_handler, self._session) #end _start_connect_timer def _cancel_connect_timer(self): if self._connect_timer is not None: gevent.kill(self._connect_timer) self._connect_timer = None #end _cancel_connect_timer def _connect_timer_expiry_handler(self, session): self._connect_timer = None self.enqueue_event( Event(event=Event._EV_CONNECT_TIMER_EXPIRED, session=session)) #end _connect_timer_expiry_handler def _is_ready_to_dequeue_event(self): return True #end _is_ready_to_dequeue_event def _log_event(self, event): if self._fsm.current == State._ESTABLISHED and \ event.event == Event._EV_SANDESH_UVE_SEND: return False return True #end _log_event def _dequeue_event(self, event): if self._log_event(event): self._logger.info("Processing event[%s] in state[%s]" \ % (event.event, self._fsm.current)) if event.session is not None and event.session is not self._session: self._logger.info("Ignore event [%s] received for old session" \ % (event.event)) return if event.event == Event._EV_COLLECTOR_CHANGE: old_active_collector = self._active_collector self._active_collector = event.primary_collector self._backup_collector = event.secondary_collector if old_active_collector == self._active_collector: self._logger.info("No change in active collector. Ignore event [%s]" \ % (event.event)) return if event.event == Event._EV_SANDESH_UVE_SEND: if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT: self._connection.handle_sandesh_uve_msg(event.msg) else: self._connection.sandesh_instance().msg_stats( ).update_tx_stats(event.msg.__class__.__name__, 0, SandeshTxDropReason.WrongClientSMState) self._logger.info("Discarding event[%s] in state[%s]" \ % (event.event, self._fsm.current)) elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \ self._fsm.current == State._ESTABLISHED: self._connection.handle_sandesh_ctrl_msg(event.msg) elif self._fsm.cannot(event.event) is True: self._logger.info("Unconsumed event[%s] in state[%s]" \ % (event.event, self._fsm.current)) else: prev_state = self.state() getattr(self._fsm, event.event)(sm=self, sm_event=event) # Log state transition self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \ % (event.event, prev_state, self.state()))
class Sandesh(object): _DEFAULT_LOG_FILE = SandeshLogger._DEFAULT_LOG_FILE _DEFAULT_SYSLOG_FACILITY = SandeshLogger._DEFAULT_SYSLOG_FACILITY class SandeshRole: INVALID = 0 GENERATOR = 1 COLLECTOR = 2 # end class SandeshRole def __init__(self): self._context = '' self._scope = '' self._module = '' self._source = '' self._node_type = '' self._instance_id = '' self._timestamp = 0 self._versionsig = 0 self._type = 0 self._hints = 0 self._client_context = '' self._client = None self._role = self.SandeshRole.INVALID self._logger = None self._level = SandeshLevel.INVALID self._category = '' self._send_queue_enabled = True self._http_server = None self._connect_to_collector = True # end __init__ # Public functions def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None, connect_to_collector=True): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._client_context = client_context self._collectors = collectors self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(source + ':' + module + ':' + node_type + ':' \ + instance_id) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) self._stats = SandeshStats() self._trace = trace.Trace() self._sandesh_request_dict = {} self._uve_type_maps = SandeshUVETypeMaps(self._logger) if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) self._gev_httpd = None if http_port != -1: self._http_server = SandeshHttp( self, module, http_port, sandesh_req_uve_pkg_list) self._gev_httpd = gevent.spawn( self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] if self._connect_to_collector: self._client = SandeshClient( self, primary_collector, secondary_collector, discovery_client) self._client.initiate() # end init_generator def uninit(self): self.kill_httpd() def kill_httpd(self): if self._gev_httpd: try: self._gev_httpd.kill() except Exception as e: self._logger.debug(str(e)) def record_port(self, name, port): pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name) try: pipeout = os.open(pipe_name, os.O_WRONLY) except: self._logger.error('Cannot write %s_port %d to %s' % (name, port, pipe_name)) else: self._logger.error('Writing %s_port %d to %s' % (name, port, pipe_name)) os.write(pipeout, '%d\n' % port) os.close(pipeout) def logger(self): return self._logger # end logger def sandesh_logger(self): return self._sandesh_logger # end sandesh_logger def set_logging_params(self, enable_local_log=False, category='', level=SandeshLevel.SYS_INFO, file=SandeshLogger._DEFAULT_LOG_FILE, enable_syslog=False, syslog_facility=_DEFAULT_SYSLOG_FACILITY): self._sandesh_logger.set_logging_params( enable_local_log, category, level, file, enable_syslog, syslog_facility) # end set_logging_params def set_local_logging(self, enable_local_log): self._sandesh_logger.set_local_logging(enable_local_log) # end set_local_logging def set_logging_level(self, level): self._sandesh_logger.set_logging_level(level) # end set_logging_level def set_logging_category(self, category): self._sandesh_logger.set_logging_category(category) # end set_logging_category def set_logging_file(self, file): self._sandesh_logger.set_logging_file(file) # end set_logging_file def is_send_queue_enabled(self): return self._send_queue_enabled # end is_send_queue_enabled def is_connect_to_collector_enabled(self): return self._connect_to_collector # end is_connect_to_collector_enabled def set_send_queue(self, enable): if self._send_queue_enabled != enable: self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s", self._send_queue_enabled, enable) self._send_queue_enabled = enable if enable: connection = self._client.connection() if connection and connection.session(): connection.session().send_queue().may_be_start_runner() # end set_send_queue def init_collector(self): pass # end init_collector def stats(self): return self._stats # end stats @classmethod def next_seqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 1 else: cls._lseqnum += 1 return cls._lseqnum # end next_seqnum @classmethod def lseqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 0 return cls._lseqnum # end lseqnum def module(self): return self._module # end module def source_id(self): return self._source # end source_id def node_type(self): return self._node_type #end node_type def instance_id(self): return self._instance_id #end instance_id def scope(self): return self._scope # end scope def context(self): return self._context # end context def seqnum(self): return self._seqnum # end seqnum def timestamp(self): return self._timestamp # end timestamp def versionsig(self): return self._versionsig # end versionsig def type(self): return self._type # end type def hints(self): return self._hints # end hints def client(self): return self._client # end client def level(self): return self._level # end level def category(self): return self._category # end category def validate(self): return # end validate def is_local_logging_enabled(self): return self._sandesh_logger.is_local_logging_enabled() # end is_local_logging_enabled def logging_level(self): return self._sandesh_logger.logging_level() # end logging_level def logging_category(self): return self._sandesh_logger.logging_category() # end logging_category def is_syslog_logging_enabled(self): return self._sandesh_logger.is_syslog_logging_enabled() #end is_syslog_logging_enabled def logging_syslog_facility(self): return self._sandesh_logger.logging_syslog_facility() #end logging_syslog_facility def is_unit_test(self): return self._role == self.SandeshRole.INVALID # end is_unit_test def handle_test(self, sandesh_init): if sandesh_init.is_unit_test() or self._is_level_ut(): if self._is_logging_allowed(sandesh_init): sandesh_init._logger.debug(self.log()) return True return False def is_logging_allowed(self, sandesh_init): if not sandesh_init.is_local_logging_enabled(): return False logging_level = sandesh_init.logging_level() level_allowed = logging_level >= self._level logging_category = sandesh_init.logging_category() if logging_category is None or len(logging_category) == 0: category_allowed = True else: category_allowed = logging_category == self._category return level_allowed and category_allowed # end is_logging_allowed def enqueue_sandesh_request(self, sandesh): self._rcv_queue.enqueue(sandesh) # end enqueue_sandesh_request def send_sandesh(self, tx_sandesh): if self._client: ret = self._client.send_sandesh(tx_sandesh) else: if self._connect_to_collector: self._logger.error('SANDESH: No Client: %s', tx_sandesh.log()) else: self._logger.log( SandeshLogger.get_py_logger_level(tx_sandesh.level()), tx_sandesh.log()) # end send_sandesh def send_generator_info(self): from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \ ModuleClientState, SandeshModuleClientTrace client_info = SandeshClientInfo() try: client_start_time = self._start_time except: self._start_time = UTCTimestampUsec() finally: client_info.start_time = self._start_time client_info.pid = os.getpid() if self._http_server is not None: client_info.http_port = self._http_server.get_port() client_info.collector_name = self._client.connection().collector() client_info.status = self._client.connection().state() client_info.successful_connections = \ self._client.connection().statemachine().connect_count() client_info.primary = self._client.connection().primary_collector() if client_info.primary is None: client_info.primary = '' client_info.secondary = \ self._client.connection().secondary_collector() if client_info.secondary is None: client_info.secondary = '' module_state = ModuleClientState(name=self._source + ':' + self._node_type + ':' + self._module + ':' + self._instance_id, client_info=client_info) generator_info = SandeshModuleClientTrace( data=module_state, sandesh=self) generator_info.send(sandesh=self) # end send_generator_info def get_sandesh_request_object(self, request): try: req_module = self._sandesh_request_dict[request] except KeyError: self._logger.error('Invalid Sandesh Request "%s"' % (request)) return None else: if req_module: try: imp_module = importlib.import_module(req_module) except ImportError: self._logger.error( 'Failed to import Module "%s"' % (req_module)) else: try: sandesh_request = getattr(imp_module, request)() return sandesh_request except AttributeError: self._logger.error( 'Failed to create Sandesh Request "%s"' % (request)) return None else: self._logger.error( 'Sandesh Request "%s" not implemented' % (request)) return None # end get_sandesh_request_object def trace_enable(self): self._trace.TraceOn() # end trace_enable def trace_disable(self): self._trace.TraceOff() # end trace_disable def is_trace_enabled(self): return self._trace.IsTraceOn() # end is_trace_enabled def trace_buffer_create(self, name, size, enable=True): self._trace.TraceBufAdd(name, size, enable) # end trace_buffer_create def trace_buffer_delete(self, name): self._trace.TraceBufDelete(name) # end trace_buffer_delete def trace_buffer_enable(self, name): self._trace.TraceBufOn(name) # end trace_buffer_enable def trace_buffer_disable(self, name): self._trace.TraceBufOff(name) # end trace_buffer_disable def is_trace_buffer_enabled(self, name): return self._trace.IsTraceBufOn(name) # end is_trace_buffer_enabled def trace_buffer_list_get(self): return self._trace.TraceBufListGet() # end trace_buffer_list_get def trace_buffer_size_get(self, name): return self._trace.TraceBufSizeGet(name) # end trace_buffer_size_get def trace_buffer_read(self, name, read_context, count, read_cb): self._trace.TraceRead(name, read_context, count, read_cb) # end trace_buffer_read def trace_buffer_read_done(self, name, context): self._trace.TraceReadDone(name, context) # end trace_buffer_read_done # API to send the trace buffer to the Collector. # If trace count is not specified/or zero, then the entire trace buffer # is sent to the Collector. # [Note] No duplicate trace message sent to the Collector. i.e., If there # is no trace message added between two consequent calls to this API, then # no trace message is sent to the Collector. def send_sandesh_trace_buffer(self, trace_buf, count=0): trace_req_runner = SandeshTraceRequestRunner(sandesh=self, request_buffer_name= trace_buf, request_context='', read_context='Collector', request_count=count) trace_req_runner.Run() # end send_sandesh_trace_buffer # Private functions def _is_level_ut(self): return self._level >= SandeshLevel.UT_START and \ self._level <= SandeshLevel.UT_END # end _is_level_ut def _create_task(self): return gevent.spawn(self._runner.run_for_ever) # end _create_task def _process_rx_sandesh(self, rx_sandesh): handle_request_fn = getattr(rx_sandesh, "handle_request", None) if callable(handle_request_fn): handle_request_fn(rx_sandesh) else: self._logger.error('Sandesh Request "%s" not implemented' % (rx_sandesh.__class__.__name__)) # end _process_rx_sandesh def _create_sandesh_request_and_uve_lists(self, package): try: imp_pkg = __import__(package) except ImportError: self._logger.error('Failed to import package "%s"' % (package)) else: try: pkg_path = imp_pkg.__path__ except AttributeError: self._logger.error( 'Failed to get package [%s] path' % (package)) return for importer, mod, ispkg in \ pkgutil.walk_packages(path=pkg_path, prefix=imp_pkg.__name__ + '.'): if not ispkg: module = mod.rsplit('.', 1)[-1] if 'ttypes' == module: self._logger.debug( 'Add Sandesh requests in module "%s"' % (mod)) self._add_sandesh_request(mod) self._logger.debug( 'Add Sandesh UVEs in module "%s"' % (mod)) self._add_sandesh_uve(mod) self._logger.debug( 'Add Sandesh Alarms in module "%s"' %(mod)) self._add_sandesh_alarm(mod) # end _create_sandesh_request_and_uve_lists def _add_sandesh_request(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: try: sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh request list' % (mod)) else: # Add sandesh requests to the dictionary. for req in sandesh_req_list: self._sandesh_request_dict[req] = mod # end _add_sandesh_request def _get_sandesh_uve_list(self, imp_module): try: sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh UVE list' % (imp_module.__name__)) return None else: return sandesh_uve_list # end _get_sandesh_uve_list def _get_sandesh_uve_data_list(self, imp_module): try: sandesh_uve_data_list = getattr(imp_module, '_SANDESH_UVE_DATA_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh UVE data list' % (imp_module.__name__)) return None else: return sandesh_uve_data_list # end _get_sandesh_uve_data_list def _add_sandesh_uve(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_uve_list = self._get_sandesh_uve_list(imp_module) sandesh_uve_data_list = self._get_sandesh_uve_data_list(imp_module) if sandesh_uve_list is None or sandesh_uve_data_list is None: return if len(sandesh_uve_list) != len(sandesh_uve_data_list): self._logger.error( '"%s" module sandesh UVE and UVE data list do not match' % (mod)) return sandesh_uve_info_list = zip(sandesh_uve_list, sandesh_uve_data_list) # Register sandesh UVEs for uve_type_name, uve_data_type_name in sandesh_uve_info_list: SandeshUVEPerTypeMap(self, SandeshType.UVE, uve_type_name, uve_data_type_name, mod) # end _add_sandesh_uve def _get_sandesh_alarm_list(self, imp_module): try: sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh Alarm list' % (imp_module.__name__)) return None else: return sandesh_alarm_list # end _get_sandesh_alarm_list def _get_sandesh_alarm_data_list(self, imp_module): try: sandesh_alarm_data_list = getattr(imp_module, '_SANDESH_ALARM_DATA_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh Alarm data list' % (imp_module.__name__)) return None else: return sandesh_alarm_data_list # end _get_sandesh_alarm_data_list def _add_sandesh_alarm(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module) sandesh_alarm_data_list = self._get_sandesh_alarm_data_list(imp_module) if sandesh_alarm_list is None or sandesh_alarm_data_list is None: return if len(sandesh_alarm_list) != len(sandesh_alarm_data_list): self._logger.error( '"%s" module sandesh Alarm and Alarm data list do not match' % (mod)) return sandesh_alarm_info_list = zip(sandesh_alarm_list, sandesh_alarm_data_list) # Register sandesh Alarms for alarm_type_name, alarm_data_type_name in sandesh_alarm_info_list: SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type_name, alarm_data_type_name, mod) # end _add_sandesh_alarm def _init_logger(self, generator): if not generator: generator = 'sandesh' self._sandesh_logger = SandeshLogger(generator) self._logger = self._sandesh_logger.logger()
class SandeshSession(TcpSession): _KEEPALIVE_IDLE_TIME = 15 # in secs _KEEPALIVE_INTERVAL = 3 # in secs _KEEPALIVE_PROBES = 5 _TCP_USER_TIMEOUT_OPT = 18 _TCP_USER_TIMEOUT_VAL = 30000 # ms def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler): self._sandesh_instance = sandesh_instance self._logger = sandesh_instance._logger self._event_handler = event_handler self._reader = SandeshReader(self, sandesh_msg_handler) self._writer = SandeshWriter(self) self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh) TcpSession.__init__(self, server) # end __init__ # Public functions def sandesh_instance(self): return self._sandesh_instance # end sandesh_instance def is_send_queue_empty(self): return self._send_queue.is_queue_empty() # end is_send_queue_empty def is_connected(self): return self._connected # end is_connected def enqueue_sandesh(self, sandesh): self._send_queue.enqueue(sandesh) # end enqueue_sandesh def send_queue(self): return self._send_queue # end send_queue # Overloaded functions from TcpSession def connect(self): TcpSession.connect(self, timeout=5) # end connect def _on_read(self, buf): if self._reader.read_msg(buf) < 0: self._logger.error('SandeshReader Error. Close Collector session') self.close() # end _on_read def _handle_event(self, event): self._event_handler(self, event) # end _handle_event def _set_socket_options(self): self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) if hasattr(socket, 'TCP_KEEPIDLE'): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, 'TCP_KEEPALIVE'): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, 'TCP_KEEPINTVL'): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, self._KEEPALIVE_INTERVAL) if hasattr(socket, 'TCP_KEEPCNT'): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, self._KEEPALIVE_PROBES) try: self._socket.setsockopt(socket.IPPROTO_TCP, self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL) except: self._logger.error( 'setsockopt failed: option %d, value %d' % (self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL)) # end _set_socket_options # Private functions def _send_sandesh(self, sandesh): if self._send_queue.is_queue_empty(): more = False else: more = True if not self._connected: if self._sandesh_instance.is_logging_dropped_allowed(sandesh): self._logger.error("SANDESH: %s: %s" % ("Not connected", sandesh.log())) return if sandesh.is_logging_allowed(self._sandesh_instance): self._logger.log( SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log()) self._writer.send_msg(sandesh, more) # end _send_sandesh def _is_ready_to_send_sandesh(self): return self._sandesh_instance.is_send_queue_enabled()
class SandeshSession(TcpSession): _KEEPALIVE_IDLE_TIME = 15 # in secs _KEEPALIVE_INTERVAL = 3 # in secs _KEEPALIVE_PROBES = 5 _TCP_USER_TIMEOUT_OPT = 18 _TCP_USER_TIMEOUT_VAL = 30000 # ms def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler): self._sandesh_instance = sandesh_instance self._logger = sandesh_instance._logger self._event_handler = event_handler self._reader = SandeshReader(self, sandesh_msg_handler) self._writer = SandeshWriter(self) self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh) TcpSession.__init__(self, server) # end __init__ # Public functions def sandesh_instance(self): return self._sandesh_instance # end sandesh_instance def is_send_queue_empty(self): return self._send_queue.is_queue_empty() # end is_send_queue_empty def is_connected(self): return self._connected # end is_connected def enqueue_sandesh(self, sandesh): self._send_queue.enqueue(sandesh) # end enqueue_sandesh def send_queue(self): return self._send_queue # end send_queue # Overloaded functions from TcpSession def connect(self): TcpSession.connect(self, timeout=5) # end connect def _on_read(self, buf): if self._reader.read_msg(buf) < 0: self._logger.error("SandeshReader Error. Close Collector session") self.close() # end _on_read def _handle_event(self, event): self._event_handler(self, event) # end _handle_event def _set_socket_options(self): self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) if hasattr(socket, "TCP_KEEPIDLE"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, "TCP_KEEPALIVE"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, "TCP_KEEPINTVL"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, self._KEEPALIVE_INTERVAL) if hasattr(socket, "TCP_KEEPCNT"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, self._KEEPALIVE_PROBES) try: self._socket.setsockopt(socket.IPPROTO_TCP, self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL) except: self._logger.error( "setsockopt failed: option %d, value %d" % (self._TCP_USER_TIMEOUT_OPT, self._TCP_USER_TIMEOUT_VAL) ) # end _set_socket_options # Private functions def _send_sandesh(self, sandesh): if self._send_queue.is_queue_empty(): more = False else: more = True if not self._connected: if self._sandesh_instance.is_logging_dropped_allowed(sandesh): self._logger.error("SANDESH: %s: %s" % ("Not connected", sandesh.log())) self._sandesh_instance.msg_stats().update_tx_stats( sandesh.__class__.__name__, 0, SandeshTxDropReason.SessionNotConnected ) return if sandesh.is_logging_allowed(self._sandesh_instance): self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log()) self._writer.send_msg(sandesh, more) # end _send_sandesh def _is_ready_to_send_sandesh(self): return self._sandesh_instance.is_send_queue_enabled()
class Sandesh(object): _DEFAULT_LOG_FILE = sand_logger.SandeshLogger._DEFAULT_LOG_FILE _DEFAULT_SYSLOG_FACILITY = ( sand_logger.SandeshLogger._DEFAULT_SYSLOG_FACILITY) class SandeshRole: INVALID = 0 GENERATOR = 1 COLLECTOR = 2 # end class SandeshRole def __init__(self): self._context = '' self._scope = '' self._module = '' self._source = '' self._node_type = '' self._instance_id = '' self._timestamp = 0 self._versionsig = 0 self._type = 0 self._hints = 0 self._client_context = '' self._client = None self._role = self.SandeshRole.INVALID self._logger = None self._level = SandeshLevel.INVALID self._category = '' self._send_queue_enabled = True self._http_server = None self._connect_to_collector = True self._disable_sending_object_logs = False self._disable_sending_all_messages = False # end __init__ # Public functions def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None, config=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._sandesh_req_uve_pkg_list = sandesh_req_uve_pkg_list or [] self._host_ip = host_ip self._client_context = client_context self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._send_level = SandeshLevel.INVALID self._init_logger(self._module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) from sandesh_stats import SandeshMessageStatistics self._msg_stats = SandeshMessageStatistics() self._trace = trace.Trace() self._sandesh_request_map = {} self._alarm_ack_callback = alarm_ack_callback self._config = config or SandeshConfig.from_parser_arguments() self._uve_type_maps = SandeshUVETypeMaps(self._logger) # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) self._sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in self._sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) if self._config.disable_object_logs is not None: self.disable_sending_object_logs(self._config.disable_object_logs) if self._config.system_logs_rate_limit is not None: SandeshSystem.set_sandesh_send_rate_limit( self._config.system_logs_rate_limit) self._gev_httpd = None if http_port != -1: self.run_introspect_server(http_port) if self._connect_to_collector: self._client = SandeshClient(self) self._client.initiate(collectors) # end init_generator def run_introspect_server(self, http_port): self._http_server = SandeshHttp(self, self._module, http_port, self._sandesh_req_uve_pkg_list, self._config) self._gev_httpd = gevent.spawn(self._http_server.start_http_server) # end run_introspect_server def uninit(self): self.kill_httpd() def kill_httpd(self): if self._gev_httpd: try: self._http_server.stop_http_server() self._http_server = None gevent.sleep(0) self._gev_httpd.kill() except Exception as e: self._logger.debug(str(e)) def record_port(self, name, port): pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name) try: pipeout = os.open(pipe_name, os.O_WRONLY) except Exception: self._logger.error('Cannot write %s_port %d to %s' % (name, port, pipe_name)) else: self._logger.error('Writing %s_port %d to %s' % (name, port, pipe_name)) os.write(pipeout, '%d\n' % port) os.close(pipeout) def logger(self): return self._logger # end logger def sandesh_logger(self): return self._sandesh_logger # end sandesh_logger def set_logging_params(self, enable_local_log=False, category='', level=SandeshLevel.SYS_INFO, file=sand_logger.SandeshLogger._DEFAULT_LOG_FILE, enable_syslog=False, syslog_facility=_DEFAULT_SYSLOG_FACILITY, enable_trace_print=False, enable_flow_log=False): self._sandesh_logger.set_logging_params( enable_local_log=enable_local_log, category=category, level=level, file=file, enable_syslog=enable_syslog, syslog_facility=syslog_facility, enable_trace_print=enable_trace_print, enable_flow_log=enable_flow_log) # end set_logging_params def set_trace_print(self, enable_trace_print): self._sandesh_logger.set_trace_print(enable_trace_print) # end set_trace_print def set_flow_logging(self, enable_flow_log): self._sandesh_logger.set_flow_logging(enable_flow_log) # end set_flow_logging def set_local_logging(self, enable_local_log): self._sandesh_logger.set_local_logging(enable_local_log) # end set_local_logging def set_logging_level(self, level): self._sandesh_logger.set_logging_level(level) # end set_logging_level def set_logging_category(self, category): self._sandesh_logger.set_logging_category(category) # end set_logging_category def set_logging_file(self, file): self._sandesh_logger.set_logging_file(file) # end set_logging_file def is_logging_dropped_allowed(self, sandesh): if sandesh.type() == SandeshType.FLOW: return self.is_flow_logging_enabled() else: if hasattr(sandesh, 'do_rate_limit_drop_log'): return sandesh.do_rate_limit_drop_log return True # end is_logging_dropped_allowed def is_send_queue_enabled(self): return self._send_queue_enabled # end is_send_queue_enabled def is_connect_to_collector_enabled(self): return self._connect_to_collector # end is_connect_to_collector_enabled def is_sending_object_logs_disabled(self): return self._disable_sending_object_logs # end is_sending_object_logs_disabled def disable_sending_object_logs(self, disable): if self._disable_sending_object_logs != disable: self._logger.info( "SANDESH: Disable Sending Object " "Logs: %s -> %s", self._disable_sending_object_logs, disable) self._disable_sending_object_logs = disable # end disable_sending_object_logs def is_sending_all_messages_disabled(self): return self._disable_sending_all_messages # end is_sending_all_messages_disabled def disable_sending_all_messages(self, disable): if self._disable_sending_all_messages != disable: self._logger.info( "SANDESH: Disable Sending ALL Messages: " "%s -> %s", self._disable_sending_all_messages, disable) self._disable_sending_all_messagess = disable # end disable_sending_all_messages def set_send_queue(self, enable): if self._send_queue_enabled != enable: self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s", self._send_queue_enabled, enable) self._send_queue_enabled = enable if enable: connection = self._client.connection() if connection and connection.session(): connection.session().send_queue().may_be_start_runner() # end set_send_queue def set_send_level(self, count, sandesh_level): if self._send_level != sandesh_level: self._logger.info('Sandesh Send Level [%s] -> [%s]' % \ (SandeshLevel._VALUES_TO_NAMES[self._send_level], SandeshLevel._VALUES_TO_NAMES[sandesh_level])) self._send_level = sandesh_level # end set_send_level def send_level(self): return self._send_level # end send_level def init_collector(self): pass # end init_collector def msg_stats(self): return self._msg_stats # end msg_stats def reconfig_collectors(self, collectors): self._client.set_collectors(collectors) # end reconfig_collectors @classmethod def next_seqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 1 else: cls._lseqnum += 1 return cls._lseqnum # end next_seqnum @classmethod def lseqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 0 return cls._lseqnum # end lseqnum def module(self): return self._module # end module def source_id(self): return self._source # end source_id def node_type(self): return self._node_type # end node_type def instance_id(self): return self._instance_id # end instance_id def host_ip(self): return self._host_ip # end host_ip def scope(self): return self._scope # end scope def context(self): return self._context # end context def seqnum(self): return self._seqnum # end seqnum def timestamp(self): return self._timestamp # end timestamp def versionsig(self): return self._versionsig # end versionsig def type(self): return self._type # end type def hints(self): return self._hints # end hints def client(self): return self._client # end client def level(self): return self._level # end level def category(self): return self._category # end category def validate(self): return # end validate def alarm_ack_callback(self): return self._alarm_ack_callback # end alarm_ack_callback def config(self): return self._config # end config def is_flow_logging_enabled(self): return self._sandesh_logger.is_flow_logging_enabled() # end is_flow_logging_enabled def is_trace_print_enabled(self): return self._sandesh_logger.is_trace_print_enabled() # end is_trace_print_enabled def is_local_logging_enabled(self): return self._sandesh_logger.is_local_logging_enabled() # end is_local_logging_enabled def logging_level(self): return self._sandesh_logger.logging_level() # end logging_level def logging_category(self): return self._sandesh_logger.logging_category() # end logging_category def is_syslog_logging_enabled(self): return self._sandesh_logger.is_syslog_logging_enabled() # end is_syslog_logging_enabled def logging_syslog_facility(self): return self._sandesh_logger.logging_syslog_facility() # end logging_syslog_facility def is_unit_test(self): return self._role == self.SandeshRole.INVALID # end is_unit_test def handle_test(self, sandesh_init): if sandesh_init.is_unit_test() or self._is_level_ut(): if self.is_logging_allowed(sandesh_init): sandesh_init._logger.debug(self.log()) return True return False def is_logging_allowed(self, sandesh_init): if self._type == SandeshType.FLOW: return sandesh_init.is_flow_logging_enabled() if not sandesh_init.is_local_logging_enabled(): return False logging_level = sandesh_init.logging_level() level_allowed = logging_level >= self._level logging_category = sandesh_init.logging_category() if logging_category is None or len(logging_category) == 0: category_allowed = True else: category_allowed = logging_category == self._category return level_allowed and category_allowed # end is_logging_allowed def enqueue_sandesh_request(self, sandesh): self._rcv_queue.enqueue(sandesh) # end enqueue_sandesh_request def send_sandesh(self, tx_sandesh): if self._client: self._client.send_sandesh(tx_sandesh) else: if self._connect_to_collector: self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient) else: self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient, tx_sandesh.level()) # end send_sandesh def drop_tx_sandesh(self, tx_sandesh, drop_reason, level=None): self._msg_stats.update_tx_stats(tx_sandesh.__class__.__name__, sys.getsizeof(tx_sandesh), drop_reason) if self.is_logging_dropped_allowed(tx_sandesh): if level is not None: self._logger.log( sand_logger.SandeshLogger.get_py_logger_level(level), tx_sandesh.log()) else: self._logger.error('SANDESH: [DROP: %s] %s' % \ (SandeshTxDropReason._VALUES_TO_NAMES[drop_reason], tx_sandesh.log())) # end drop_tx_sandesh def send_generator_info(self): from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \ ModuleClientState, SandeshModuleClientTrace if not self._client or not self._client.connection(): return client_info = SandeshClientInfo() try: client_start_time = self._start_time except Exception: self._start_time = util.UTCTimestampUsec() finally: client_info.start_time = self._start_time client_info.pid = os.getpid() if self._http_server is not None: client_info.http_port = self._http_server.get_port() client_info.collector_name = \ self._client.connection().collector_name() or '' client_info.collector_ip = \ self._client.connection().collector() or '' client_info.collector_list = self._client.connection().collectors() client_info.status = self._client.connection().state() client_info.successful_connections = ( self._client.connection().statemachine().connect_count()) module_state = ModuleClientState(name=self._source + ':' + self._node_type + ':' + self._module + ':' + self._instance_id, client_info=client_info, sm_queue_count=self._client.\ connection().statemachine()._event_queue.size(), max_sm_queue_count=self._client.\ connection().statemachine()._event_queue.max_qlen()) generator_info = SandeshModuleClientTrace(data=module_state, sandesh=self) generator_info.send(sandesh=self) # end send_generator_info def get_sandesh_request_object(self, request): try: req_type = self._sandesh_request_map[request] except KeyError: self._logger.error('Invalid Sandesh Request "%s"' % (request)) return None else: return req_type() # end get_sandesh_request_object def trace_enable(self): self._trace.TraceOn() # end trace_enable def trace_disable(self): self._trace.TraceOff() # end trace_disable def is_trace_enabled(self): return self._trace.IsTraceOn() # end is_trace_enabled def trace_buffer_create(self, name, size, enable=True): self._trace.TraceBufAdd(name, size, enable) # end trace_buffer_create def trace_buffer_delete(self, name): self._trace.TraceBufDelete(name) # end trace_buffer_delete def trace_buffer_enable(self, name): self._trace.TraceBufOn(name) # end trace_buffer_enable def trace_buffer_disable(self, name): self._trace.TraceBufOff(name) # end trace_buffer_disable def is_trace_buffer_enabled(self, name): return self._trace.IsTraceBufOn(name) # end is_trace_buffer_enabled def trace_buffer_list_get(self): return self._trace.TraceBufListGet() # end trace_buffer_list_get def trace_buffer_size_get(self, name): return self._trace.TraceBufSizeGet(name) # end trace_buffer_size_get def trace_buffer_read(self, name, read_context, count, read_cb): self._trace.TraceRead(name, read_context, count, read_cb) # end trace_buffer_read def trace_buffer_read_done(self, name, context): self._trace.TraceReadDone(name, context) # end trace_buffer_read_done # API to send the trace buffer to the Collector. # If trace count is not specified/or zero, then the entire trace buffer # is sent to the Collector. # [Note] No duplicate trace message sent to the Collector. i.e., If there # is no trace message added between two consequent calls to this API, then # no trace message is sent to the Collector. def send_sandesh_trace_buffer(self, trace_buf, count=0): trace_req_runner = SandeshTraceRequestRunner( sandesh=self, request_buffer_name=trace_buf, request_context='', read_context='Collector', request_count=count) trace_req_runner.Run() # end send_sandesh_trace_buffer # Private functions def _is_level_ut(self): return (self._level >= SandeshLevel.UT_START and self._level <= SandeshLevel.UT_END) # end _is_level_ut def _create_task(self): return gevent.spawn(self._runner.run_for_ever) # end _create_task def _process_rx_sandesh(self, rx_sandesh): handle_request_fn = getattr(rx_sandesh, "handle_request", None) if callable(handle_request_fn): handle_request_fn(rx_sandesh) else: self._logger.error('Sandesh Request "%s" not implemented' % (rx_sandesh.__class__.__name__)) # end _process_rx_sandesh def _create_sandesh_request_and_uve_lists(self, package): try: imp_pkg = __import__(package) except ImportError: self._logger.error('Failed to import package "%s"' % (package)) else: try: pkg_path = imp_pkg.__path__ except AttributeError: self._logger.error('Failed to get package [%s] path' % (package)) return for importer, mod, ispkg in (pkgutil.walk_packages( path=pkg_path, prefix=imp_pkg.__name__ + '.')): if not ispkg: module = mod.rsplit('.', 1)[-1] if 'ttypes' == module: self._logger.debug( 'Add Sandesh requests in module "%s"' % (mod)) self._add_sandesh_request(mod) self._logger.debug('Add Sandesh UVEs in module "%s"' % (mod)) self._add_sandesh_uve(mod) self._logger.debug( 'Add Sandesh Alarms in module "%s"' % (mod)) self._add_sandesh_alarm(mod) # end _create_sandesh_request_and_uve_lists def _add_sandesh_request(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: try: sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh request list' % (mod)) else: # Add sandesh requests to the dictionary. for req in sandesh_req_list: self._sandesh_request_map[req.__name__] = req # end _add_sandesh_request def _get_sandesh_uve_list(self, imp_module): try: sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST') except AttributeError: self._logger.error('"%s" module does not have sandesh UVE list' % (imp_module.__name__)) return None else: return sandesh_uve_list # end _get_sandesh_uve_list def _add_sandesh_uve(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_uve_list = self._get_sandesh_uve_list(imp_module) if not sandesh_uve_list: return # Register sandesh UVEs for uve_type, uve_data_type in sandesh_uve_list: SandeshUVEPerTypeMap(self, SandeshType.UVE, uve_type, uve_data_type) # end _add_sandesh_uve def _get_sandesh_alarm_list(self, imp_module): try: sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST') except AttributeError: self._logger.error('"%s" module does not have sandesh Alarm list' % (imp_module.__name__)) return None else: return sandesh_alarm_list # end _get_sandesh_alarm_list def _add_sandesh_alarm(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module) if not sandesh_alarm_list: return # Register sandesh Alarms for alarm_type, alarm_data_type in sandesh_alarm_list: SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type, alarm_data_type) # end _add_sandesh_alarm def _init_logger(self, module, logger_class=None, logger_config_file=None): if not module: module = 'sandesh' if logger_class: self._sandesh_logger = (sand_logger.create_logger( module, logger_class, logger_config_file=logger_config_file)) else: self._sandesh_logger = sand_logger.SandeshLogger( module, logger_config_file=logger_config_file) self._logger = self._sandesh_logger.logger()
class Sandesh(object): _DEFAULT_LOG_FILE = SandeshLogger._DEFAULT_LOG_FILE _DEFAULT_SYSLOG_FACILITY = SandeshLogger._DEFAULT_SYSLOG_FACILITY class SandeshRole: INVALID = 0 GENERATOR = 1 COLLECTOR = 2 # end class SandeshRole def __init__(self): self._context = '' self._scope = '' self._module = '' self._source = '' self._node_type = '' self._instance_id = '' self._timestamp = 0 self._versionsig = 0 self._type = 0 self._hints = 0 self._client_context = '' self._client = None self._role = self.SandeshRole.INVALID self._logger = None self._level = SandeshLevel.INVALID self._category = '' self._send_queue_enabled = True self._http_server = None # end __init__ # Public functions def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._client_context = client_context self._collectors = collectors self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._init_logger(source + ':' + module + ':' + node_type + ':' \ + instance_id) self._stats = SandeshStats() self._trace = trace.Trace() self._sandesh_request_dict = {} self._uve_type_maps = SandeshUVETypeMaps() if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) if http_port != -1: self._http_server = SandeshHttp( self, module, http_port, sandesh_req_uve_pkg_list) gevent.spawn(self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] self._client = SandeshClient( self, primary_collector, secondary_collector, discovery_client) self._client.initiate() # end init_generator def logger(self): return self._logger # end logger def sandesh_logger(self): return self._sandesh_logger # end sandesh_logger def set_logging_params(self, enable_local_log=False, category='', level=SandeshLevel.SYS_INFO, file=SandeshLogger._DEFAULT_LOG_FILE, enable_syslog=False, syslog_facility=_DEFAULT_SYSLOG_FACILITY): self._sandesh_logger.set_logging_params( enable_local_log, category, level, file, enable_syslog, syslog_facility) # end set_logging_params def set_local_logging(self, enable_local_log): self._sandesh_logger.set_local_logging(enable_local_log) # end set_local_logging def set_logging_level(self, level): self._sandesh_logger.set_logging_level(level) # end set_logging_level def set_logging_category(self, category): self._sandesh_logger.set_logging_category(category) # end set_logging_category def set_logging_file(self, file): self._sandesh_logger.set_logging_file(file) # end set_logging_file def is_send_queue_enabled(self): return self._send_queue_enabled # end is_send_queue_enabled def set_send_queue(self, enable): if self._send_queue_enabled != enable: self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s", self._send_queue_enabled, enable) self._send_queue_enabled = enable if enable: connection = self._client.connection() if connection and connection.session(): connection.session().send_queue().may_be_start_runner() # end set_send_queue def init_collector(self): pass # end init_collector def stats(self): return self._stats # end stats @classmethod def next_seqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 1 else: cls._lseqnum += 1 return cls._lseqnum # end next_seqnum @classmethod def lseqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 0 return cls._lseqnum # end lseqnum def module(self): return self._module # end module def source_id(self): return self._source # end source_id def node_type(self): return self._node_type #end node_type def instance_id(self): return self._instance_id #end instance_id def scope(self): return self._scope # end scope def context(self): return self._context # end context def seqnum(self): return self._seqnum # end seqnum def timestamp(self): return self._timestamp # end timestamp def versionsig(self): return self._versionsig # end versionsig def type(self): return self._type # end type def hints(self): return self._hints # end hints def client(self): return self._client # end client def level(self): return self._level # end level def category(self): return self._category # end category def validate(self): return # end validate def is_local_logging_enabled(self): return self._sandesh_logger.is_local_logging_enabled() # end is_local_logging_enabled def logging_level(self): return self._sandesh_logger.logging_level() # end logging_level def logging_category(self): return self._sandesh_logger.logging_category() # end logging_category def is_syslog_logging_enabled(self): return self._sandesh_logger.is_syslog_logging_enabled() #end is_syslog_logging_enabled def logging_syslog_facility(self): return self._sandesh_logger.logging_syslog_facility() #end logging_syslog_facility def is_unit_test(self): return self._role == self.SandeshRole.INVALID # end is_unit_test def handle_test(self, sandesh_init): if sandesh_init.is_unit_test() or self._is_level_ut(): if self._is_logging_allowed(sandesh_init): sandesh_init._logger.debug(self.log()) return True return False def is_logging_allowed(self, sandesh_init): if not sandesh_init.is_local_logging_enabled(): return False logging_level = sandesh_init.logging_level() level_allowed = logging_level >= self._level logging_category = sandesh_init.logging_category() if logging_category is None or len(logging_category) == 0: category_allowed = True else: category_allowed = logging_category == self._category return level_allowed and category_allowed # end is_logging_allowed def enqueue_sandesh_request(self, sandesh): self._rcv_queue.enqueue(sandesh) # end enqueue_sandesh_request def send_sandesh(self, tx_sandesh): if self._client: ret = self._client.send_sandesh(tx_sandesh) else: self._logger.debug(tx_sandesh.log()) # end send_sandesh def send_generator_info(self): from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \ ModuleClientState, SandeshModuleClientTrace client_info = SandeshClientInfo() try: client_start_time = self._start_time except: self._start_time = UTCTimestampUsec() finally: client_info.start_time = self._start_time client_info.pid = os.getpid() if self._http_server is not None: client_info.http_port = self._http_server.get_port() client_info.collector_name = self._client.connection().collector() client_info.status = self._client.connection().state() client_info.successful_connections = \ self._client.connection().statemachine().connect_count() client_info.primary = self._client.connection().primary_collector() if client_info.primary is None: client_info.primary = '' client_info.secondary = \ self._client.connection().secondary_collector() if client_info.secondary is None: client_info.secondary = '' module_state = ModuleClientState(name=self._source + ':' + self._node_type + ':' + self._module + ':' + self._instance_id, client_info=client_info) generator_info = SandeshModuleClientTrace( data=module_state, sandesh=self) generator_info.send(sandesh=self) # end send_generator_info def get_sandesh_request_object(self, request): try: req_module = self._sandesh_request_dict[request] except KeyError: self._logger.error('Invalid Sandesh Request "%s"' % (request)) return None else: if req_module: try: imp_module = importlib.import_module(req_module) except ImportError: self._logger.error( 'Failed to import Module "%s"' % (req_module)) else: try: sandesh_request = getattr(imp_module, request)() return sandesh_request except AttributeError: self._logger.error( 'Failed to create Sandesh Request "%s"' % (request)) return None else: self._logger.error( 'Sandesh Request "%s" not implemented' % (request)) return None # end get_sandesh_request_object def trace_enable(self): self._trace.TraceOn() # end trace_enable def trace_disable(self): self._trace.TraceOff() # end trace_disable def is_trace_enabled(self): return self._trace.IsTraceOn() # end is_trace_enabled def trace_buffer_create(self, name, size, enable=True): self._trace.TraceBufAdd(name, size, enable) # end trace_buffer_create def trace_buffer_delete(self, name): self._trace.TraceBufDelete(name) # end trace_buffer_delete def trace_buffer_enable(self, name): self._trace.TraceBufOn(name) # end trace_buffer_enable def trace_buffer_disable(self, name): self._trace.TraceBufOff(name) # end trace_buffer_disable def is_trace_buffer_enabled(self, name): return self._trace.IsTraceBufOn(name) # end is_trace_buffer_enabled def trace_buffer_list_get(self): return self._trace.TraceBufListGet() # end trace_buffer_list_get def trace_buffer_size_get(self, name): return self._trace.TraceBufSizeGet(name) # end trace_buffer_size_get def trace_buffer_read(self, name, read_context, count, read_cb): self._trace.TraceRead(name, read_context, count, read_cb) # end trace_buffer_read def trace_buffer_read_done(self, name, context): self._trace.TraceReadDone(name, context) # end trace_buffer_read_done # API to send the trace buffer to the Collector. # If trace count is not specified/or zero, then the entire trace buffer # is sent to the Collector. # [Note] No duplicate trace message sent to the Collector. i.e., If there # is no trace message added between two consequent calls to this API, then # no trace message is sent to the Collector. def send_sandesh_trace_buffer(self, trace_buf, count=0): trace_req_runner = SandeshTraceRequestRunner(sandesh=self, request_buffer_name= trace_buf, request_context='', read_context='Collector', request_count=count) trace_req_runner.Run() # end send_sandesh_trace_buffer # Private functions def _is_level_ut(self): return self._level >= SandeshLevel.UT_START and \ self._level <= SandeshLevel.UT_END # end _is_level_ut def _create_task(self): return gevent.spawn(self._runner.run_for_ever) # end _create_task def _process_rx_sandesh(self, rx_sandesh): handle_request_fn = getattr(rx_sandesh, "handle_request", None) if callable(handle_request_fn): handle_request_fn(rx_sandesh) else: self._logger.error('Sandesh Request "%s" not implemented' % (rx_sandesh.__class__.__name__)) # end _process_rx_sandesh def _create_sandesh_request_and_uve_lists(self, package): try: imp_pkg = __import__(package) except ImportError: self._logger.error('Failed to import package "%s"' % (package)) else: try: pkg_path = imp_pkg.__path__ except AttributeError: self._logger.error( 'Failed to get package [%s] path' % (package)) return for importer, mod, ispkg in \ pkgutil.walk_packages(path=pkg_path, prefix=imp_pkg.__name__ + '.'): if not ispkg: module = mod.rsplit('.', 1)[-1] if 'ttypes' == module: self._logger.debug( 'Add Sandesh requests in module "%s"' % (mod)) self._add_sandesh_request(mod) self._logger.debug( 'Add Sandesh UVEs in module "%s"' % (mod)) self._add_sandesh_uve(mod) # end _create_sandesh_request_and_uve_lists def _add_sandesh_request(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: try: sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh request list' % (mod)) else: # Add sandesh requests to the dictionary. for req in sandesh_req_list: self._sandesh_request_dict[req] = mod # end _add_sandesh_request def _add_sandesh_uve(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: try: sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh UVE list' % (mod)) else: # Register sandesh UVEs for uve_type_name in sandesh_uve_list: SandeshUVEPerTypeMap(self, uve_type_name, mod) # end _add_sandesh_uve def _init_logger(self, generator): if not generator: generator = 'sandesh' self._sandesh_logger = SandeshLogger(generator) self._logger = self._sandesh_logger.logger()
class SandeshStateMachine(object): _IDLE_HOLD_TIME = 4 # in seconds _CONNECT_TIME = 30 # in seconds def __init__(self, connection, logger, collectors): def _update_connection_state(e, status): from connection_info import ConnectionState from gen_py.process_info.ttypes import ConnectionType collector_addr = e.sm.collector() if collector_addr is None: collector_addr = '' ConnectionState.update(conn_type = ConnectionType.COLLECTOR, name = '', status = status, server_addrs = [collector_addr], message = '%s to %s on %s' % (e.src, e.dst, e.event)) #end _update_connection_state def _connection_state_up(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.UP) #end _connection_state_up def _connection_state_down(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.DOWN) #end _connection_state_down def _connection_state_init(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.INIT) #end _connection_state_init def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() if e.sm._disable != True: e.sm._start_idle_hold_timer() # update connection state _connection_state_down(e) e.sm._collector_name = None e.sm._connection.sandesh_instance().send_generator_info() #end _on_idle def _on_disconnect(e): # update connection state _connection_state_down(e) #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._collector_name = None # clean up existing connection e.sm._delete_session() collector = e.sm._get_next_collector() if collector is not None: # update connection state _connection_state_init(e) e.sm._create_session(collector) e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_init(e) #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._collector_name = e.sm_event.source e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_up(e) #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': {'state' : State._IDLE, 'event' : Event._EV_START, 'defer' : True }, 'events': [ # _IDLE {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_START, 'src' : State._IDLE, 'dst' : State._CONNECT }, # _DISCONNECT {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._DISCONNECT, 'dst' : State._CONNECT }, # _CONNECT {'name' : Event._EV_COLLECTOR_UNKNOWN, 'src' : State._CONNECT, 'dst' : State._DISCONNECT }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT, 'dst' : State._CLIENT_INIT }, # _CLIENT_INIT {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CLOSE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src' : State._CLIENT_INIT, 'dst' : State._ESTABLISHED }, # _ESTABLISHED {'name' : Event._EV_TCP_CLOSE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT }, {'name' : Event._EV_STOP, 'src' : State._ESTABLISHED, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT } ], 'callbacks': { 'on' + State._IDLE : _on_idle, 'on' + State._CONNECT : _on_connect, 'on' + State._CLIENT_INIT : _on_client_init, 'on' + State._ESTABLISHED : _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._disable = False self._idle_hold_timer = None self._connect_timer = None self._collectors = collectors self._collector_name = None self._collector_index = -1 self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event) #end __init__ # Public functions def initialize(self): self.enqueue_event(Event(event = Event._EV_START)) #end initialize def session(self): return self._session #end session def state(self): return self._fsm.current #end state def shutdown(self): self._disable = True self.enqueue_event(Event(event = Event._EV_STOP)) #end shutdown def set_admin_state(self, down): if down == True: self._disable = True self.enqueue_event(Event(event = Event._EV_STOP)) else: self._disable = False self.enqueue_event(Event(event = Event._EV_START)) #end set_admin_state def connect_count(self): return self._connects #end connect_count def collector(self): if self._collector_index is -1: return None return self._collectors[self._collector_index] # end collector def collector_name(self): return self._collector_name # end collector_name def collectors(self): return self._collectors # end collectors def enqueue_event(self, event): self._event_queue.enqueue(event) #end enqueue_event def on_session_event(self, session, event): if session is not self._session: self._logger.error("Ignore session event [%d] received for old session" % (event)) return if SandeshSession.SESSION_ESTABLISHED == event: self._logger.info("Session Event: TCP Connected") self.enqueue_event(Event(event = Event._EV_TCP_CONNECTED, session = session)) elif SandeshSession.SESSION_ERROR == event: self._logger.error("Session Event: TCP Connect Fail") self.enqueue_event(Event(event = Event._EV_TCP_CONNECT_FAIL, session = session)) elif SandeshSession.SESSION_CLOSE == event: self._logger.error("Session Event: TCP Connection Closed") self.enqueue_event(Event(event = Event._EV_TCP_CLOSE, session = session)) else: self._logger.error("Received unknown session event [%d]" % (event)) #end on_session_event def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector): if sandesh_ctrl.success == True: self.enqueue_event(Event(event = Event._EV_SANDESH_CTRL_MESSAGE_RECV, session = session, msg = sandesh_ctrl, source = collector)) else: # Negotiation with the Collector failed, reset the # connection and retry after sometime. self._logger.error("Negotiation with the Collector %s failed." % (collector)) self._session.close() #end on_sandesh_ctrl_msg_receive def on_sandesh_uve_msg_send(self, sandesh_uve): self.enqueue_event(Event(event = Event._EV_SANDESH_UVE_SEND, msg = sandesh_uve)) #end on_sandesh_uve_msg_send # Private functions def _create_session(self, collector): assert self._session is None collector_ip_port = collector.split(':') server = (collector_ip_port[0], int(collector_ip_port[1])) self._session = SandeshSession(self._connection.sandesh_instance(), server, self.on_session_event, self._connection._receive_sandesh_msg) #end _create_session def _delete_session(self): if self._session: self._session.close() self._session = None self._collector_name = None #end _delete_session def _get_next_collector(self): if self._collector_index is -1: if not self._collectors: return None self._collector_index = 0 else: self._collector_index += 1 if self._collector_index == len(self._collectors): self._collector_index = 0 return self._collectors[self._collector_index] # end _get_next_collector def _start_idle_hold_timer(self): if self._idle_hold_timer is None: if self._IDLE_HOLD_TIME: self._idle_hold_timer = gevent.spawn_later(self._IDLE_HOLD_TIME, self._idle_hold_timer_expiry_handler) else: self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _start_idle_hold_timer def _cancel_idle_hold_timer(self): if self._idle_hold_timer is not None: gevent.kill(self._idle_hold_timer) self._idle_hold_timer = None #end _cancel_idle_hold_timer def _idle_hold_timer_expiry_handler(self): self._idle_hold_timer = None self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _idle_hold_timer_expiry_handler def _start_connect_timer(self): if self._connect_timer is None: self._connect_timer = gevent.spawn_later(self._CONNECT_TIME, self._connect_timer_expiry_handler, self._session) #end _start_connect_timer def _cancel_connect_timer(self): if self._connect_timer is not None: gevent.kill(self._connect_timer) self._connect_timer = None #end _cancel_connect_timer def _connect_timer_expiry_handler(self, session): self._connect_timer = None self.enqueue_event(Event(event = Event._EV_CONNECT_TIMER_EXPIRED, session = session)) #end _connect_timer_expiry_handler def _is_ready_to_dequeue_event(self): return True #end _is_ready_to_dequeue_event def _log_event(self, event): if self._fsm.current == State._ESTABLISHED and \ event.event == Event._EV_SANDESH_UVE_SEND: return False return True #end _log_event def _dequeue_event(self, event): if self._log_event(event): self._logger.info("Processing event[%s] in state[%s]" \ % (event.event, self._fsm.current)) if event.session is not None and event.session is not self._session: self._logger.info("Ignore event [%s] received for old session" \ % (event.event)) return if event.event == Event._EV_COLLECTOR_CHANGE: collector = self.collector() self._collector_index = -1 collector_list_change = False if self._collectors != event.collectors: self._collectors = event.collectors collector_list_change = True if self._collectors and self._collectors[0] == collector: self._collector_index = 0 self._logger.info("No change in active collector. " "Ignore event [%s]" % (event.event)) if collector_list_change: # update the collector_list in the ModuleClientState UVE self._connection.sandesh_instance().send_generator_info() return self._connection.sandesh_instance().send_generator_info() if event.event == Event._EV_SANDESH_UVE_SEND: if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT: self._connection.handle_sandesh_uve_msg(event.msg) else: self._connection.sandesh_instance().drop_tx_sandesh(event.msg, SandeshTxDropReason.WrongClientSMState) self._logger.info("Discarding event[%s] in state[%s]" \ % (event.event, self._fsm.current)) elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \ self._fsm.current == State._ESTABLISHED: self._connection.handle_sandesh_ctrl_msg(event.msg) elif self._fsm.cannot(event.event) is True: self._logger.info("Unconsumed event[%s] in state[%s]" \ % (event.event, self._fsm.current)) else: prev_state = self.state() getattr(self._fsm, event.event)(sm = self, sm_event = event) # Log state transition self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \ % (event.event, prev_state, self.state()))
from work_queue import Task, WorkQueue, set_debug_flag from work_queue import WORK_QUEUE_SCHEDULE_FCFS, WORK_QUEUE_SCHEDULE_FILES from work_queue import WORK_QUEUE_RANDOM_PORT from work_queue import WORK_QUEUE_OUTPUT #from workqueue import WORK_QUEUE_MASTER_MODE_STANDALONE, WORK_QUEUE_WORKER_MODE_SHARED from work_queue import WORK_QUEUE_TASK_ORDER_LIFO import os import sys import time set_debug_flag('debug') set_debug_flag('wq') wq = WorkQueue(WORK_QUEUE_RANDOM_PORT, name='workqueue_example', catalog=True, exclusive=False) os.environ['PATH'] = '../../../work_queue/src:' + os.environ['PATH'] os.system('work_queue_worker -d all localhost %d &' % wq.port) print wq.name wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS) #wq.specify_name('workqueue_example') #wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE) #wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED) wq.specify_task_order(WORK_QUEUE_TASK_ORDER_LIFO) if wq.empty(): print 'work queue is empty'
class JobGenerator(object): def __init__(self, template, debug=False, dry_run=False): self.config = Configuration() self.debug = debug self.dry_run = dry_run # The work queue will figure out a valid combination of MongoDB access # parameters, e.g., host/port, URI, or replica set discovery via DNS self.wq = WorkQueue(host=self.config.mongodb_host, port=self.config.mongodb_port, uri=self.config.mongodb_uri, srv_name=self.config.mongodb_rs_srv, database=self.config.mongodb_queue_db, replicaset=self.config.mongodb_rs, collection=self.config.mongodb_queue_col) if not os.path.exists(template): raise Exception("Template file does not exist") self.template_dir = os.path.dirname(template) self.template_file = os.path.basename(template) if self.template_file == "": raise Exception("Template must be a file, not a directory") self.jinja = jinja2.Environment(loader=jinja2.FileSystemLoader( self.template_dir), autoescape=False) def _generate_random_id(self, team_cyan, team_magenta, suffix_length=8): return team_cyan + "-vs-" + team_magenta + ":" + \ ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(suffix_length)) #''.join(random.choices(string.ascii_uppercase + string.digits, k=8)) def _generate_id(self, tournament_name, team_cyan, team_magenta, index, suffix_length=6): return \ "{tournament_name}:{:0{width}}:{team_cyan}-vs-{team_magenta}"\ .format(index, width=suffix_length, tournament_name=tournament_name, team_cyan=team_cyan, team_magenta=team_magenta) @staticmethod def id_regex(tournament_name): return "^%s:\d+:.*" % tournament_name def generate(self, tournament_name, team_cyan, team_magenta, job_num=None): param_vars = { "tournament_name": tournament_name, "team_name_cyan": team_cyan, "team_name_magenta": team_magenta or "" } template = self.jinja.get_template(self.template_file) if template is None: print("Failed to get template '%s' (in '%s')" % (self.template_file, self.template_dir)) raise FileNotFoundError("Could not find template '%s' ( in '%s')" \ % (template_name, self.template_dir)) yamldoc = template.render(param_vars) if self.debug: print("YAML:\n%s" % yamldoc) try: (tournament_doc, parameter_doc) = yaml.load_all(yamldoc) except: for idx, line in enumerate(yamldoc.splitlines()): print("%-4d: %s" % (idx, line)) raise #if self.debug: #print("Tournament:\n") #pprint(tournament_doc) #print("Parameters:\n") #pprint(parameter_doc) #print("Parameters:") #for p in parameter_doc["parameters"]: # print("- %s" % p["template"]) idnum = job_num if job_num is not None else 1 if self.dry_run else self.wq.get_next_id( ) jobname = self._generate_id(tournament_name, team_cyan, team_magenta, idnum) params = { "parameter_vars": param_vars, "parameter_doc_yaml": yamldoc, "template_parameters": parameter_doc["parameters"] } #if self.debug: #print("Job Parameters") #pprint(params) return (jobname, idnum, params) def store(self, jobname, idnum, params): if not self.dry_run: self.wq.add_item(jobname, idnum, params) def generate_and_store(self, tournament_name, team_cyan, team_magenta): (jobname, idnum, params) = self.generate(tournament_name, team_cyan, team_magenta) self.store(jobname, idnum, params) return (jobname, idnum, params) def update_params(self, tournament_name, print_diffs=False, only_pending=True): for i in self.wq.get_items(JobGenerator.id_regex(tournament_name)): if only_pending and i['status']['state'] != 'pending': continue job_parts = i['name'].split(':') teams = job_parts[2].split("-vs-") (jobname, idnum, params) = \ self.generate(job_parts[0], teams[0], teams[1], job_num=int(job_parts[1])) if jobname != i['name']: raise Exception("Invalid jobname, expected '%s', got '%s'" % (i['name'], jobname)) diff = unified_diff( i["params"]["parameter_doc_yaml"].splitlines(True), params['parameter_doc_yaml'].splitlines(True), fromfile='%s OLD' % jobname, tofile='%s' % jobname) diffstr = ''.join(diff) if print_diffs: if len(diffstr) == 0: print("%s: no update required" % jobname) else: print(diffstr) if not self.dry_run and len(diffstr) > 0: update = { "$push": { "updates": { "updated": datetime.datetime.utcnow(), "diff": diffstr } }, "$set": { "status.state": "pending", "params": params }, "$unset": { "manifests": "", "status.completed": "", "status.running": "" } } if self.debug: pprint(update) self.wq.update_item(jobname, update) def cancel_jobs(self, tournament_name, only_pending=True): update = { "$set": { "status.state": "cancelled", "status.cancelled": datetime.datetime.utcnow() } } for i in self.wq.get_items(JobGenerator.id_regex(tournament_name)): if i['status']['state'] != 'cancelled' and \ (not only_pending or i['status']['state'] == 'pending'): print("Cancelling %s" % i['name']) if not self.dry_run: self.wq.update_item(i['name'], update)
class Sandesh(object): _DEFAULT_LOG_FILE = sand_logger.SandeshLogger._DEFAULT_LOG_FILE _DEFAULT_SYSLOG_FACILITY = ( sand_logger.SandeshLogger._DEFAULT_SYSLOG_FACILITY) class SandeshRole: INVALID = 0 GENERATOR = 1 COLLECTOR = 2 # end class SandeshRole def __init__(self): self._context = '' self._scope = '' self._module = '' self._source = '' self._node_type = '' self._instance_id = '' self._timestamp = 0 self._versionsig = 0 self._type = 0 self._hints = 0 self._client_context = '' self._client = None self._role = self.SandeshRole.INVALID self._logger = None self._level = SandeshLevel.INVALID self._category = '' self._send_queue_enabled = True self._http_server = None self._connect_to_collector = True # end __init__ # Public functions def init_generator(self, module, source, node_type, instance_id, collectors, client_context, http_port, sandesh_req_uve_pkg_list=None, discovery_client=None, connect_to_collector=True, logger_class=None, logger_config_file=None, host_ip='127.0.0.1', alarm_ack_callback=None): self._role = self.SandeshRole.GENERATOR self._module = module self._source = source self._node_type = node_type self._instance_id = instance_id self._host_ip = host_ip self._client_context = client_context self._collectors = collectors self._connect_to_collector = connect_to_collector self._rcv_queue = WorkQueue(self._process_rx_sandesh) self._send_level = SandeshLevel.INVALID self._init_logger(module, logger_class=logger_class, logger_config_file=logger_config_file) self._logger.info('SANDESH: CONNECT TO COLLECTOR: %s', connect_to_collector) from sandesh_stats import SandeshMessageStatistics self._msg_stats = SandeshMessageStatistics() self._trace = trace.Trace() self._sandesh_request_map = {} self._alarm_ack_callback = alarm_ack_callback self._uve_type_maps = SandeshUVETypeMaps(self._logger) if sandesh_req_uve_pkg_list is None: sandesh_req_uve_pkg_list = [] # Initialize the request handling # Import here to break the cyclic import dependency import sandesh_req_impl sandesh_req_impl = sandesh_req_impl.SandeshReqImpl(self) sandesh_req_uve_pkg_list.append('pysandesh.gen_py') for pkg_name in sandesh_req_uve_pkg_list: self._create_sandesh_request_and_uve_lists(pkg_name) self._gev_httpd = None if http_port != -1: self._http_server = SandeshHttp( self, module, http_port, sandesh_req_uve_pkg_list) self._gev_httpd = gevent.spawn(self._http_server.start_http_server) primary_collector = None secondary_collector = None if self._collectors is not None: if len(self._collectors) > 0: primary_collector = self._collectors[0] if len(self._collectors) > 1: secondary_collector = self._collectors[1] if self._connect_to_collector: self._client = SandeshClient( self, primary_collector, secondary_collector, discovery_client) self._client.initiate() # end init_generator def uninit(self): self.kill_httpd() def kill_httpd(self): if self._gev_httpd: try: self._http_server.stop_http_server() self._http_server = None gevent.sleep(0) self._gev_httpd.kill() except Exception as e: self._logger.debug(str(e)) def record_port(self, name, port): pipe_name = '/tmp/%s.%d.%s_port' % (self._module, os.getppid(), name) try: pipeout = os.open(pipe_name, os.O_WRONLY) except Exception: self._logger.error('Cannot write %s_port %d to %s' % (name, port, pipe_name)) else: self._logger.error('Writing %s_port %d to %s' % (name, port, pipe_name)) os.write(pipeout, '%d\n' % port) os.close(pipeout) def logger(self): return self._logger # end logger def sandesh_logger(self): return self._sandesh_logger # end sandesh_logger def set_logging_params(self, enable_local_log=False, category='', level=SandeshLevel.SYS_INFO, file=sand_logger.SandeshLogger._DEFAULT_LOG_FILE, enable_syslog=False, syslog_facility=_DEFAULT_SYSLOG_FACILITY, enable_trace_print=False, enable_flow_log=False): self._sandesh_logger.set_logging_params( enable_local_log=enable_local_log, category=category, level=level, file=file, enable_syslog=enable_syslog, syslog_facility=syslog_facility, enable_trace_print=enable_trace_print, enable_flow_log=enable_flow_log) # end set_logging_params def set_trace_print(self, enable_trace_print): self._sandesh_logger.set_trace_print(enable_trace_print) # end set_trace_print def set_flow_logging(self, enable_flow_log): self._sandesh_logger.set_flow_logging(enable_flow_log) # end set_flow_logging def set_local_logging(self, enable_local_log): self._sandesh_logger.set_local_logging(enable_local_log) # end set_local_logging def set_logging_level(self, level): self._sandesh_logger.set_logging_level(level) # end set_logging_level def set_logging_category(self, category): self._sandesh_logger.set_logging_category(category) # end set_logging_category def set_logging_file(self, file): self._sandesh_logger.set_logging_file(file) # end set_logging_file def is_logging_dropped_allowed(self, sandesh): if sandesh.type() == SandeshType.FLOW: return self.is_flow_logging_enabled() else: if hasattr(sandesh, 'do_rate_limit_drop_log'): return sandesh.do_rate_limit_drop_log return True # end is_logging_dropped_allowed def is_send_queue_enabled(self): return self._send_queue_enabled # end is_send_queue_enabled def is_connect_to_collector_enabled(self): return self._connect_to_collector # end is_connect_to_collector_enabled def set_send_queue(self, enable): if self._send_queue_enabled != enable: self._logger.info("SANDESH: CLIENT: SEND QUEUE: %s -> %s", self._send_queue_enabled, enable) self._send_queue_enabled = enable if enable: connection = self._client.connection() if connection and connection.session(): connection.session().send_queue().may_be_start_runner() # end set_send_queue def set_send_level(self, count, sandesh_level): if self._send_level != sandesh_level: self._logger.info('Sandesh Send Level [%s] -> [%s]' % \ (SandeshLevel._VALUES_TO_NAMES[self._send_level], SandeshLevel._VALUES_TO_NAMES[sandesh_level])) self._send_level = sandesh_level # end set_send_level def send_level(self): return self._send_level # end send_level def init_collector(self): pass # end init_collector def msg_stats(self): return self._msg_stats # end msg_stats @classmethod def next_seqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 1 else: cls._lseqnum += 1 return cls._lseqnum # end next_seqnum @classmethod def lseqnum(cls): if not hasattr(cls, '_lseqnum'): cls._lseqnum = 0 return cls._lseqnum # end lseqnum def module(self): return self._module # end module def source_id(self): return self._source # end source_id def node_type(self): return self._node_type # end node_type def instance_id(self): return self._instance_id # end instance_id def host_ip(self): return self._host_ip # end host_ip def scope(self): return self._scope # end scope def context(self): return self._context # end context def seqnum(self): return self._seqnum # end seqnum def timestamp(self): return self._timestamp # end timestamp def versionsig(self): return self._versionsig # end versionsig def type(self): return self._type # end type def hints(self): return self._hints # end hints def client(self): return self._client # end client def level(self): return self._level # end level def category(self): return self._category # end category def validate(self): return # end validate def alarm_ack_callback(self): return self._alarm_ack_callback # end alarm_ack_callback def is_flow_logging_enabled(self): return self._sandesh_logger.is_flow_logging_enabled() # end is_flow_logging_enabled def is_trace_print_enabled(self): return self._sandesh_logger.is_trace_print_enabled() # end is_trace_print_enabled def is_local_logging_enabled(self): return self._sandesh_logger.is_local_logging_enabled() # end is_local_logging_enabled def logging_level(self): return self._sandesh_logger.logging_level() # end logging_level def logging_category(self): return self._sandesh_logger.logging_category() # end logging_category def is_syslog_logging_enabled(self): return self._sandesh_logger.is_syslog_logging_enabled() # end is_syslog_logging_enabled def logging_syslog_facility(self): return self._sandesh_logger.logging_syslog_facility() # end logging_syslog_facility def is_unit_test(self): return self._role == self.SandeshRole.INVALID # end is_unit_test def handle_test(self, sandesh_init): if sandesh_init.is_unit_test() or self._is_level_ut(): if self.is_logging_allowed(sandesh_init): sandesh_init._logger.debug(self.log()) return True return False def is_logging_allowed(self, sandesh_init): if self._type == SandeshType.FLOW: return sandesh_init.is_flow_logging_enabled() if not sandesh_init.is_local_logging_enabled(): return False logging_level = sandesh_init.logging_level() level_allowed = logging_level >= self._level logging_category = sandesh_init.logging_category() if logging_category is None or len(logging_category) == 0: category_allowed = True else: category_allowed = logging_category == self._category return level_allowed and category_allowed # end is_logging_allowed def enqueue_sandesh_request(self, sandesh): self._rcv_queue.enqueue(sandesh) # end enqueue_sandesh_request def send_sandesh(self, tx_sandesh): if self._client: self._client.send_sandesh(tx_sandesh) else: if self._connect_to_collector: self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient) else: self.drop_tx_sandesh(tx_sandesh, SandeshTxDropReason.NoClient, tx_sandesh.level()) # end send_sandesh def drop_tx_sandesh(self, tx_sandesh, drop_reason, level=None): self._msg_stats.update_tx_stats(tx_sandesh.__class__.__name__, sys.getsizeof(tx_sandesh), drop_reason) if self.is_logging_dropped_allowed(tx_sandesh): if level is not None: self._logger.log( sand_logger.SandeshLogger.get_py_logger_level(level), tx_sandesh.log()) else: self._logger.error('SANDESH: [DROP: %s] %s' % \ (SandeshTxDropReason._VALUES_TO_NAMES[drop_reason], tx_sandesh.log())) # end drop_tx_sandesh def send_generator_info(self): from gen_py.sandesh_uve.ttypes import SandeshClientInfo, \ ModuleClientState, SandeshModuleClientTrace client_info = SandeshClientInfo() try: client_start_time = self._start_time except Exception: self._start_time = util.UTCTimestampUsec() finally: client_info.start_time = self._start_time client_info.pid = os.getpid() if self._http_server is not None: client_info.http_port = self._http_server.get_port() client_info.collector_name = self._client.connection().collector() client_info.status = self._client.connection().state() client_info.successful_connections = ( self._client.connection().statemachine().connect_count()) client_info.primary = self._client.connection().primary_collector() if client_info.primary is None: client_info.primary = '' client_info.secondary = ( self._client.connection().secondary_collector()) if client_info.secondary is None: client_info.secondary = '' module_state = ModuleClientState(name=self._source + ':' + self._node_type + ':' + self._module + ':' + self._instance_id, client_info=client_info) generator_info = SandeshModuleClientTrace( data=module_state, sandesh=self) generator_info.send(sandesh=self) # end send_generator_info def get_sandesh_request_object(self, request): try: req_type = self._sandesh_request_map[request] except KeyError: self._logger.error('Invalid Sandesh Request "%s"' % (request)) return None else: return req_type() # end get_sandesh_request_object def trace_enable(self): self._trace.TraceOn() # end trace_enable def trace_disable(self): self._trace.TraceOff() # end trace_disable def is_trace_enabled(self): return self._trace.IsTraceOn() # end is_trace_enabled def trace_buffer_create(self, name, size, enable=True): self._trace.TraceBufAdd(name, size, enable) # end trace_buffer_create def trace_buffer_delete(self, name): self._trace.TraceBufDelete(name) # end trace_buffer_delete def trace_buffer_enable(self, name): self._trace.TraceBufOn(name) # end trace_buffer_enable def trace_buffer_disable(self, name): self._trace.TraceBufOff(name) # end trace_buffer_disable def is_trace_buffer_enabled(self, name): return self._trace.IsTraceBufOn(name) # end is_trace_buffer_enabled def trace_buffer_list_get(self): return self._trace.TraceBufListGet() # end trace_buffer_list_get def trace_buffer_size_get(self, name): return self._trace.TraceBufSizeGet(name) # end trace_buffer_size_get def trace_buffer_read(self, name, read_context, count, read_cb): self._trace.TraceRead(name, read_context, count, read_cb) # end trace_buffer_read def trace_buffer_read_done(self, name, context): self._trace.TraceReadDone(name, context) # end trace_buffer_read_done # API to send the trace buffer to the Collector. # If trace count is not specified/or zero, then the entire trace buffer # is sent to the Collector. # [Note] No duplicate trace message sent to the Collector. i.e., If there # is no trace message added between two consequent calls to this API, then # no trace message is sent to the Collector. def send_sandesh_trace_buffer(self, trace_buf, count=0): trace_req_runner = SandeshTraceRequestRunner( sandesh=self, request_buffer_name=trace_buf, request_context='', read_context='Collector', request_count=count) trace_req_runner.Run() # end send_sandesh_trace_buffer # Private functions def _is_level_ut(self): return (self._level >= SandeshLevel.UT_START and self._level <= SandeshLevel.UT_END) # end _is_level_ut def _create_task(self): return gevent.spawn(self._runner.run_for_ever) # end _create_task def _process_rx_sandesh(self, rx_sandesh): handle_request_fn = getattr(rx_sandesh, "handle_request", None) if callable(handle_request_fn): handle_request_fn(rx_sandesh) else: self._logger.error('Sandesh Request "%s" not implemented' % (rx_sandesh.__class__.__name__)) # end _process_rx_sandesh def _create_sandesh_request_and_uve_lists(self, package): try: imp_pkg = __import__(package) except ImportError: self._logger.error('Failed to import package "%s"' % (package)) else: try: pkg_path = imp_pkg.__path__ except AttributeError: self._logger.error( 'Failed to get package [%s] path' % (package)) return for importer, mod, ispkg in ( pkgutil.walk_packages(path=pkg_path, prefix=imp_pkg.__name__ + '.')): if not ispkg: module = mod.rsplit('.', 1)[-1] if 'ttypes' == module: self._logger.debug( 'Add Sandesh requests in module "%s"' % (mod)) self._add_sandesh_request(mod) self._logger.debug( 'Add Sandesh UVEs in module "%s"' % (mod)) self._add_sandesh_uve(mod) self._logger.debug( 'Add Sandesh Alarms in module "%s"' % (mod)) self._add_sandesh_alarm(mod) # end _create_sandesh_request_and_uve_lists def _add_sandesh_request(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: try: sandesh_req_list = getattr(imp_module, '_SANDESH_REQUEST_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh request list' % (mod)) else: # Add sandesh requests to the dictionary. for req in sandesh_req_list: self._sandesh_request_map[req.__name__] = req # end _add_sandesh_request def _get_sandesh_uve_list(self, imp_module): try: sandesh_uve_list = getattr(imp_module, '_SANDESH_UVE_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh UVE list' % (imp_module.__name__)) return None else: return sandesh_uve_list # end _get_sandesh_uve_list def _add_sandesh_uve(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_uve_list = self._get_sandesh_uve_list(imp_module) if not sandesh_uve_list: return # Register sandesh UVEs for uve_type, uve_data_type in sandesh_uve_list: SandeshUVEPerTypeMap(self, SandeshType.UVE, uve_type, uve_data_type) # end _add_sandesh_uve def _get_sandesh_alarm_list(self, imp_module): try: sandesh_alarm_list = getattr(imp_module, '_SANDESH_ALARM_LIST') except AttributeError: self._logger.error( '"%s" module does not have sandesh Alarm list' % (imp_module.__name__)) return None else: return sandesh_alarm_list # end _get_sandesh_alarm_list def _add_sandesh_alarm(self, mod): try: imp_module = importlib.import_module(mod) except ImportError: self._logger.error('Failed to import Module "%s"' % (mod)) else: sandesh_alarm_list = self._get_sandesh_alarm_list(imp_module) if not sandesh_alarm_list: return # Register sandesh Alarms for alarm_type, alarm_data_type in sandesh_alarm_list: SandeshUVEPerTypeMap(self, SandeshType.ALARM, alarm_type, alarm_data_type) # end _add_sandesh_alarm def _init_logger(self, module, logger_class=None, logger_config_file=None): if not module: module = 'sandesh' if logger_class: self._sandesh_logger = (sand_logger.create_logger( module, logger_class, logger_config_file=logger_config_file)) else: self._sandesh_logger = sand_logger.SandeshLogger( module, logger_config_file=logger_config_file) self._logger = self._sandesh_logger.logger()
class QMaster(threading.Thread): def __init__(self, project, port, log_freq=600): # 600 seconds """Initialize the QMaster Parameters ---------- project : port : int log_freq : int, optional frequency to print info about the status of the work queue. In units of seconds. Default is to print every 10 minutes. """ threading.Thread.__init__(self) self.project = project self.log_freq = log_freq # print time in seconds self.wake_freq = 1 # seconds self.wq = WorkQueue(port, name='MSMAccelerator', catalog=True, exclusive=False) logger.info('WORK QUEUE MASTER LISTENING ON PORT: %d', self.wq.port) logger.info('(Start a local worker with >> work_queue_worker -d all localhost %d & )', self.wq.port) # method controls whether or not we need to bring back solvated_xtc as well if self.project.method == 'explicit': self.return_wet_xtc = True elif self.project.method == 'implicit': self.return_wet_xtc = False else: raise Exception("project.method must be 'explicit' or 'implicit'") logger.info('Return wet xtc set to %s', self.return_wet_xtc) # what does this specify algorithm do? self.wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS) # fast abort kills jobs that appear to be stragling (taking more than 1.5x average) #self.wq.activate_fast_abort(1.5) # setting the stop event signals for the thread to die self._stop = threading.Event() # the thread sets the event every time a job returns or there are no waiting jobs # and it finished post processing. See the wait method self._mainloop_wake_event_cause = None self._mainloop_wake_event = threading.Event() # start the thread self.start() def run(self): """Main thread-loop for the QMaster thread""" last_print = time.time() while True: time.sleep(self.wake_freq) if not self.wq.empty(): t = self.wq.wait(self.wake_freq) if t: if t.return_status != 0: logger.error('Worker returned nonzero exit status for job: %d', t.return_status) else: self.on_return(t) self._mainloop_wake_event_cause = 'job returned' self._mainloop_wake_event.set() if self.wq.stats.tasks_waiting == 0 and not self._mainloop_wake_event.is_set(): self._mainloop_wake_event_cause = 'queue empty' self._mainloop_wake_event.set() # also set the event if there are no tasks in the queue if self._stop.is_set(): logger.info('Recieved stop signal. Shutting down all workers') self.wq.shutdown_workers(0) # 0 indicates to shut all of them down sys.exit(0) if time.time() - last_print > self.log_freq: logger.info('workers initialized: %d, ready: %d, busy: %d', self.wq.stats.workers_init, self.wq.stats.workers_ready, self.wq.stats.workers_busy) logger.info('workers running: %d, waiting: %d, complete: %d', self.wq.stats.tasks_running, self.wq.stats.tasks_waiting, self.wq.stats.tasks_complete) last_print = time.time() def num_jobs_waiting(self): """Number of jobs waiting to be sent out This number should be kept at 1, and when it drops to zero a new job should be generated. Returns ------- n : int The number """ return self.wq.stats.tasks_waiting def num_jobs_in_queue(self): """Get the number of jobs currently in the work queue This includes both the jobs running remotely and the ones waiting here Returns ------- n : int The number """ return self.wq.stats.tasks_running + self.wq.stats.tasks_waiting def stop(self): """Signal the Qmaster thread to stop""" self._stop.set() def wait(self): """Block until some sort of action happens in the main-thread loop. This call will return either when a job as returned from the workers, or when the queue is empty (last job in the local queue has been sent out) Returns ------- wakeup_cause : str Either 'job returned' or 'queue empty', depending on the reason """ self._mainloop_wake_event.wait() self._mainloop_wake_event.clear() cause = self._mainloop_wake_event_cause if not cause in ['job returned', 'queue empty']: raise Exception('Bad wakeup cause') return cause @with_db_lock def submit(self, traj): """ Submit a job to the work-queue for further sampling. Parameters ---------- """ if traj.submit_time is not None: raise ValueError("This traj has already been submitted") Session.add(traj) Session.flush() traj.populate_default_filenames() if not hasattr(traj, 'init_pdb'): raise ValueError('Traj is supposed to have a pdb object tacked on') save_file(traj.init_pdb_fn, traj.init_pdb) remote_driver_fn = os.path.split(str(traj.forcefield.driver))[1] remote_pdb_fn = 'input.pdb' remote_output_fn = 'production_dry{}'.format(traj.forcefield.output_extension) if traj.mode is None or traj.forcefield is None: raise ValueError('malformed traj') task = Task('chmod +x ./{driver}; ./{driver} {pdb_fn} {ff} {water} {mode} {threads}'.format( pdb_fn=remote_pdb_fn, mode=traj.mode, driver=remote_driver_fn, ff=traj.forcefield.name, water=traj.forcefield.water, threads=traj.forcefield.threads)) #why does traj.forcefield.driver come out as unicode? task.specify_input_file(str(traj.forcefield.driver), remote_driver_fn) task.specify_output_file(traj.wqlog_fn, 'logs/driver.log') task.specify_input_file(traj.init_pdb_fn, remote_pdb_fn) task.specify_output_file(traj.dry_xtc_fn, remote_output_fn) if self.return_wet_xtc: # this is the XTC file with waters, generated by the driver # when you're doing implicit solvent only, this stuff is not used. remote_wet_output_fn = 'production_wet{}'.format(traj.forcefield.output_extension) task.specify_output_file(traj.wet_xtc_fn, remote_wet_output_fn) task.specify_output_file(traj.last_wet_snapshot_fn, 'last_wet_snapshot.pdb') else: logger.debug('Not requesting production_wet%s from driver (implicit)', traj.forcefield.output_extension) task.specify_tag(str(traj.id)) task.specify_algorithm(WORK_QUEUE_SCHEDULE_FILES) # what does this do? traj.submit_time = datetime.now() self.wq.submit(task) logger.info('Submitted to queue: %s', traj) @with_db_lock def on_return(self, task): """Called by main thread on the return of data from the workers. Post-processing""" logger.info('Retrieved task %s', task.tag) traj = Session.query(models.Trajectory).get(int(task.tag)) try: # save lh5 version of the trajectory conf = load_file(self.project.pdb_topology_file) coordinates = msmbuilder.Trajectory.load_trajectory_file(str(traj.dry_xtc_fn), Conf=conf) save_file(traj.lh5_fn, coordinates) except Exception as e: logger.error('When postprocessing %s, convert to lh5 failed!', traj) logger.exception(e) raise # convert last_wet_snapshot to lh5 pdb_to_lh5(traj, 'last_wet_snapshot_fn') pdb_to_lh5(traj, 'init_pdb_fn') traj.host = task.host traj.returned_time = datetime.now() traj.length = len(coordinates) logger.info('Finished converting new traj to lh5 sucessfully')
""" Python-WorkQueue test """ from work_queue import Task, WorkQueue, set_debug_flag from work_queue import WORK_QUEUE_SCHEDULE_FCFS, WORK_QUEUE_SCHEDULE_FILES from work_queue import WORK_QUEUE_RANDOM_PORT from work_queue import WORK_QUEUE_OUTPUT #from workqueue import WORK_QUEUE_MASTER_MODE_STANDALONE, WORK_QUEUE_WORKER_MODE_SHARED import os import sys import time set_debug_flag('debug') set_debug_flag('wq') wq = WorkQueue(WORK_QUEUE_RANDOM_PORT, name='workqueue_example', catalog=False, exclusive=False) os.system('work_queue_worker -d all localhost %d &' % wq.port) print wq.name wq.specify_algorithm(WORK_QUEUE_SCHEDULE_FCFS) #wq.specify_name('workqueue_example') #wq.specify_master_mode(WORK_QUEUE_MASTER_MODE_STANDALONE) #wq.specify_worker_mode(WORK_QUEUE_WORKER_MODE_SHARED) if wq.empty(): print 'work queue is empty' outputs = [] for i in range(5):
def __init__(self, connection, logger, primary_collector, secondary_collector): def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # Reset active and backup collector self._active_collector = self._connection.primary_collector() self._backup_collector = self._connection.secondary_collector() # clean up existing connection e.sm._delete_session() e.sm._start_idle_hold_timer() #end _on_idle def _on_disconnect(e): pass #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._connection.reset_collector() # clean up existing connection e.sm._delete_session() if e.sm._active_collector is not None: e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_connect_to_backup(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() # try to connect to the backup collector, if known if e.sm._backup_collector is not None: e.sm._active_collector, e.sm._backup_collector = \ e.sm._backup_collector, e.sm._active_collector e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN)) #end _on_connect_to_backup def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._connection.set_collector(e.sm_event.source) e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) self._connection.sandesh_instance().send_generator_info() #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': {'state' : State._IDLE, 'event' : Event._EV_START, 'defer' : True }, 'events': [ # _IDLE {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._IDLE, 'dst' : State._CONNECT }, # _DISCONNECT {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._DISCONNECT, 'dst' : State._CONNECT }, # _CONNECT {'name' : Event._EV_COLLECTOR_UNKNOWN, 'src' : State._CONNECT, 'dst' : State._DISCONNECT }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT, 'dst' : State._CLIENT_INIT }, # _CONNECT_TO_BACKUP {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._CLIENT_INIT }, # _CLIENT_INIT {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CLOSE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src' : State._CLIENT_INIT, 'dst' : State._ESTABLISHED }, # _ESTABLISHED {'name' : Event._EV_TCP_CLOSE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT } ], 'callbacks': { 'on' + State._IDLE : _on_idle, 'on' + State._CONNECT : _on_connect, 'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup, 'on' + State._CLIENT_INIT : _on_client_init, 'on' + State._ESTABLISHED : _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._idle_hold_timer = None self._connect_timer = None self._active_collector = primary_collector self._backup_collector = secondary_collector self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event)
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(), queue_lock=threading.Lock(), launch_cmd=None, env=None, collector_queue=multiprocessing.Queue(), see_worker_output=False, data_dir=".", full=False, cancel_value=multiprocessing.Value('i', 1), port=WORK_QUEUE_DEFAULT_PORT, wq_log_dir=None, project_password=None, project_password_file=None, project_name=None): """Thread to handle Parsl app submissions to the Work Queue objects. Takes in Parsl functions submitted using submit(), and creates a Work Queue task with the appropriate specifications, which is then submitted to Work Queue. After tasks are completed, processes the exit status and exit code of the task, and sends results to the Work Queue collector thread. """ logger.debug("Starting WorkQueue Submit/Wait Process") # Enable debugging flags and create logging file if wq_log_dir is not None: logger.debug("Setting debugging flags and creating logging file") wq_debug_log = os.path.join(wq_log_dir, "debug_log") cctools_debug_flags_set("all") cctools_debug_config_file(wq_debug_log) # Create WorkQueue queue object logger.debug("Creating WorkQueue Object") try: logger.debug("Listening on port {}".format(port)) q = WorkQueue(port) except Exception as e: logger.error("Unable to create WorkQueue object: {}".format(e)) raise e # Specify WorkQueue queue attributes if project_name: q.specify_name(project_name) if project_password: q.specify_password(project_password) elif project_password_file: q.specify_password_file(project_password_file) # Only write logs when the wq_log_dir is specified, which it most likely will be if wq_log_dir is not None: wq_master_log = os.path.join(wq_log_dir, "master_log") wq_trans_log = os.path.join(wq_log_dir, "transaction_log") if full: wq_resource_log = os.path.join(wq_log_dir, "resource_logs") q.enable_monitoring_full(dirname=wq_resource_log) q.specify_log(wq_master_log) q.specify_transactions_log(wq_trans_log) wq_tasks = set() orig_ppid = os.getppid() continue_running = True while (continue_running): # Monitor the task queue ppid = os.getppid() if ppid != orig_ppid: logger.debug("new Process") continue_running = False continue # Submit tasks while task_queue.qsize() > 0: if cancel_value.value == 0: logger.debug("cancel value set to cancel") continue_running = False break # Obtain task from task_queue try: item = task_queue.get(timeout=1) logger.debug("Removing task from queue") except queue.Empty: continue parsl_id = item["task_id"] # Extract information about the task function_data_loc = item["data_loc"] function_data_loc_remote = function_data_loc.split("/")[-1] function_result_loc = item["result_loc"] function_result_loc_remote = function_result_loc.split("/")[-1] input_files = item["input_files"] output_files = item["output_files"] std_files = item["std_files"] full_script_name = workqueue_worker.__file__ script_name = full_script_name.split("/")[-1] remapping_string = "" std_string = "" # Parse input file information logger.debug("Looking at input") for item in input_files: if item[3] == "std": std_string += "mv " + item[1] + " " + item[0] + "; " else: remapping_string += item[0] + ":" + item[1] + "," logger.debug(remapping_string) # Parse output file information logger.debug("Looking at output") for item in output_files: remapping_string += item[0] + ":" + item[1] + "," logger.debug(remapping_string) if len(input_files) + len(output_files) > 0: remapping_string = "-r " + remapping_string remapping_string = remapping_string[:-1] # Create command string logger.debug(launch_cmd) command_str = launch_cmd.format( input_file=function_data_loc_remote, output_file=function_result_loc_remote, remapping_string=remapping_string) command_str = std_string + command_str logger.debug(command_str) # Create WorkQueue task for the command logger.debug("Sending task {} with command: {}".format( parsl_id, command_str)) try: t = Task(command_str) except Exception as e: logger.error("Unable to create task: {}".format(e)) continue # Specify environment variables for the task if env is not None: for var in env: t.specify_environment_variable(var, env[var]) # Specify script, and data/result files for task t.specify_file(full_script_name, script_name, WORK_QUEUE_INPUT, cache=True) t.specify_file(function_data_loc, function_data_loc_remote, WORK_QUEUE_INPUT, cache=False) t.specify_file(function_result_loc, function_result_loc_remote, WORK_QUEUE_OUTPUT, cache=False) t.specify_tag(str(parsl_id)) logger.debug("Parsl ID: {}".format(t.id)) # Specify all input/output files for task for item in input_files: t.specify_file(item[0], item[1], WORK_QUEUE_INPUT, cache=item[2]) for item in output_files: t.specify_file(item[0], item[1], WORK_QUEUE_OUTPUT, cache=item[2]) for item in std_files: t.specify_file(item[0], item[1], WORK_QUEUE_OUTPUT, cache=item[2]) # Submit the task to the WorkQueue object logger.debug("Submitting task {} to WorkQueue".format(parsl_id)) try: wq_id = q.submit(t) wq_tasks.add(wq_id) except Exception as e: logger.error("Unable to create task: {}".format(e)) msg = { "tid": parsl_id, "result_received": False, "reason": "Workqueue Task Start Failure", "status": 1 } collector_queue.put_nowait(msg) continue logger.debug("Task {} submitted to WorkQueue with id {}".format( parsl_id, wq_id)) if cancel_value.value == 0: continue_running = False # If the queue is not empty wait on the WorkQueue queue for a task task_found = True if not q.empty() and continue_running: while task_found is True: if cancel_value.value == 0: continue_running = False task_found = False continue # Obtain the task from the queue t = q.wait(1) if t is None: task_found = False continue else: parsl_tid = t.tag logger.debug( "Completed WorkQueue task {}, parsl task {}".format( t.id, parsl_tid)) status = t.return_status task_result = t.result msg = None # Task failure if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS and task_result != WORK_QUEUE_RESULT_OUTPUT_MISSING): logger.debug( "Wrapper Script status: {}\nWorkQueue Status: {}". format(status, task_result)) # Wrapper script failure if status != 0: logger.debug( "WorkQueue task {} failed with status {}". format(t.id, status)) reason = "Wrapper Script Failure: " if status == 1: reason += "problem parsing command line options" elif status == 2: reason += "problem loading function data" elif status == 3: reason += "problem remapping file names" elif status == 4: reason += "problem writing out function result" else: reason += "unable to process wrapper script failure with status = {}".format( status) reason += "\nTrace:\n" + str(t.output) logger.debug( "WorkQueue runner script failed for task {} because {}\n" .format(parsl_tid, reason)) # WorkQueue system failure else: reason = "WorkQueue System Failure: " if task_result == 1: reason += "missing input file" elif task_result == 2: reason += "unable to generate output file" elif task_result == 4: reason += "stdout has been truncated" elif task_result == 1 << 3: reason += "task terminated with a signal" elif task_result == 2 << 3: reason += "task used more resources than requested" elif task_result == 3 << 3: reason += "task ran past the specified end time" elif task_result == 4 << 3: reason += "result could not be classified" elif task_result == 5 << 3: reason += "task failed, but not a task error" elif task_result == 6 << 3: reason += "unable to complete after specified number of retries" elif task_result == 7 << 3: reason += "task ran for more than the specified time" elif task_result == 8 << 3: reason += "task needed more space to complete task" else: reason += "unable to process Work Queue system failure" msg = { "tid": parsl_tid, "result_received": False, "reason": reason, "status": status } collector_queue.put_nowait(msg) # Task Success else: # Print the output from the task if see_worker_output: print(t.output) # Load result into result file result_loc = os.path.join( data_dir, "task_" + str(parsl_tid) + "_function_result") logger.debug( "Looking for result in {}".format(result_loc)) f = open(result_loc, "rb") result = pickle.load(f) f.close() msg = { "tid": parsl_tid, "result_received": True, "result": result } wq_tasks.remove(t.id) collector_queue.put_nowait(msg) if continue_running is False: logger.debug("Exiting WorkQueue Master Thread event loop") break # Remove all WorkQueue tasks that remain in the queue object for wq_task in wq_tasks: logger.debug("Cancelling WorkQueue Task {}".format(wq_task)) q.cancel_by_taskid(wq_task) logger.debug("Exiting WorkQueue Monitoring Process") return 0
class SandeshSession(TcpSession): _KEEPALIVE_IDLE_TIME = 45 # in secs _KEEPALIVE_INTERVAL = 3 # in secs _KEEPALIVE_PROBES = 5 def __init__(self, sandesh_instance, server, event_handler, sandesh_msg_handler): self._sandesh_instance = sandesh_instance self._logger = sandesh_instance._logger self._event_handler = event_handler self._reader = SandeshReader(self, sandesh_msg_handler) self._writer = SandeshWriter(self) self._send_queue = WorkQueue(self._send_sandesh, self._is_ready_to_send_sandesh) TcpSession.__init__(self, server) # end __init__ # Public functions def sandesh_instance(self): return self._sandesh_instance # end sandesh_instance def is_send_queue_empty(self): return self._send_queue.is_queue_empty() # end is_send_queue_empty def is_connected(self): return self._connected # end is_connected def enqueue_sandesh(self, sandesh): self._send_queue.enqueue(sandesh) # end enqueue_sandesh def send_queue(self): return self._send_queue # end send_queue # Overloaded functions from TcpSession def connect(self): TcpSession.connect(self, timeout=5) # end connect def _on_read(self, buf): if self._reader.read_msg(buf) < 0: self._logger.error("SandeshReader Error. Close Collector session") self.close() # end _on_read def _handle_event(self, event): self._event_handler(self, event) # end _handle_event def _set_socket_options(self): self._socket.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) if hasattr(socket, "TCP_KEEPIDLE"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, "TCP_KEEPALIVE"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPALIVE, self._KEEPALIVE_IDLE_TIME) if hasattr(socket, "TCP_KEEPINTVL"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, self._KEEPALIVE_INTERVAL) if hasattr(socket, "TCP_KEEPCNT"): self._socket.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, self._KEEPALIVE_PROBES) # end _set_socket_options # Private functions def _send_sandesh(self, sandesh): if self._send_queue.is_queue_empty(): more = False else: more = True if not self._connected: self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log()) return if sandesh.is_logging_allowed(self._sandesh_instance): self._logger.log(SandeshLogger.get_py_logger_level(sandesh.level()), sandesh.log()) self._writer.send_msg(sandesh, more) # end _send_sandesh def _is_ready_to_send_sandesh(self): return self._sandesh_instance.is_send_queue_enabled()
def work_queue_main(items, function, accumulator, **kwargs): """Execute using Work Queue For valid parameters, see :py:func:`work_queue_executor` in :py:mod:`executor`. For more information, see :ref:`intro-coffea-wq` """ global _wq_queue _check_dynamic_chunksize_targets(kwargs["dynamic_chunksize"]) clevel = kwargs["compression"] if clevel is not None: function = _compression_wrapper(clevel, function) accumulate_fn = _compression_wrapper(clevel, accumulate_result_files) else: accumulate_fn = accumulate_result_files _vprint.verbose_mode = kwargs["verbose"] or kwargs["print_stdout"] _vprint.status_mode = kwargs["status"] if not kwargs["port"]: kwargs["port"] = 0 if kwargs["master_name"] else 9123 if kwargs["environment_file"] and not kwargs["wrapper"]: raise ValueError( "Location of python_package_run could not be determined automatically.\nUse 'wrapper' argument to the work_queue_executor." ) if _wq_queue is None: _wq_queue = WorkQueue( port=kwargs["port"], name=kwargs["master_name"], debug_log=kwargs["debug_log"], stats_log=kwargs["stats_log"], transactions_log=kwargs["transactions_log"], ) # Make use of the stored password file, if enabled. if kwargs["password_file"] is not None: _wq_queue.specify_password_file(kwargs["password_file"]) print("Listening for work queue workers on port {}...".format(_wq_queue.port)) # perform a wait to print any warnings before progress bars _wq_queue.wait(0) _declare_resources(kwargs) # Working within a custom temporary directory: with tempfile.TemporaryDirectory( prefix="wq-executor-tmp-", dir=kwargs["filepath"] ) as tmpdir: fn_wrapper = _create_fn_wrapper(kwargs["x509_proxy"], tmpdir=tmpdir) infile_function = _function_to_file( function, prefix_name=kwargs["function_name"], tmpdir=tmpdir ) infile_accum_fn = _function_to_file( accumulate_fn, prefix_name="accum", tmpdir=tmpdir ) if kwargs["custom_init"]: kwargs["custom_init"](_wq_queue) if kwargs["desc"] == "Preprocessing": return _work_queue_preprocessing( items, accumulator, fn_wrapper, infile_function, tmpdir, kwargs ) else: return _work_queue_processing( items, accumulator, fn_wrapper, infile_function, infile_accum_fn, tmpdir, kwargs, )
class SandeshStateMachine(object): _IDLE_HOLD_TIME = 5 # in seconds _CONNECT_TIME = 30 # in seconds def __init__(self, connection, logger, primary_collector, secondary_collector): def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # Reset active and backup collector self._active_collector = self._connection.primary_collector() self._backup_collector = self._connection.secondary_collector() # clean up existing connection e.sm._delete_session() e.sm._start_idle_hold_timer() #end _on_idle def _on_disconnect(e): pass #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._connection.reset_collector() # clean up existing connection e.sm._delete_session() if e.sm._active_collector is not None: e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_connect_to_backup(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() # try to connect to the backup collector, if known if e.sm._backup_collector is not None: e.sm._active_collector, e.sm._backup_collector = \ e.sm._backup_collector, e.sm._active_collector e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN)) #end _on_connect_to_backup def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._connection.set_collector(e.sm_event.source) e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) self._connection.sandesh_instance().send_generator_info() #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': {'state' : State._IDLE, 'event' : Event._EV_START, 'defer' : True }, 'events': [ # _IDLE {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._IDLE, 'dst' : State._CONNECT }, # _DISCONNECT {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._DISCONNECT, 'dst' : State._CONNECT }, # _CONNECT {'name' : Event._EV_COLLECTOR_UNKNOWN, 'src' : State._CONNECT, 'dst' : State._DISCONNECT }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT, 'dst' : State._CLIENT_INIT }, # _CONNECT_TO_BACKUP {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._CLIENT_INIT }, # _CLIENT_INIT {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CLOSE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src' : State._CLIENT_INIT, 'dst' : State._ESTABLISHED }, # _ESTABLISHED {'name' : Event._EV_TCP_CLOSE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT } ], 'callbacks': { 'on' + State._IDLE : _on_idle, 'on' + State._CONNECT : _on_connect, 'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup, 'on' + State._CLIENT_INIT : _on_client_init, 'on' + State._ESTABLISHED : _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._idle_hold_timer = None self._connect_timer = None self._active_collector = primary_collector self._backup_collector = secondary_collector self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event) #end __init__ # Public functions def initialize(self): self.enqueue_event(Event(event = Event._EV_START)) #end initialize def session(self): return self._session #end session def state(self): return self._fsm.current #end state def shutdown(self): self.enqueue_event(Event(event = Event._EV_STOP)) #end shutdown def set_admin_state(self, down): if down == True: self.enqueue_event(Event(event = Event._EV_STOP)) else: self.enqueue_event(Event(event = Event._EV_START)) #end set_admin_state def connect_count(self): return self._connects #end connect_count def active_collector(self): return self._active_collector #end active_collector def backup_collector(self): return self._backup_collector #end backup_collector def enqueue_event(self, event): self._event_queue.enqueue(event) #end enqueue_event def on_session_event(self, session, event): if session is not self._session: self._logger.error("Ignore session event [%d] received for old session" % (event)) return if SandeshSession.SESSION_ESTABLISHED == event: self._logger.info("Session Event: TCP Connected") self.enqueue_event(Event(event = Event._EV_TCP_CONNECTED, session = session)) elif SandeshSession.SESSION_ERROR == event: self._logger.error("Session Event: TCP Connect Fail") self.enqueue_event(Event(event = Event._EV_TCP_CONNECT_FAIL, session = session)) elif SandeshSession.SESSION_CLOSE == event: self._logger.error("Session Event: TCP Connection Closed") self.enqueue_event(Event(event = Event._EV_TCP_CLOSE, session = session)) else: self._logger.error("Received unknown session event [%d]" % (event)) #end on_session_event def on_sandesh_ctrl_msg_receive(self, session, sandesh_ctrl, collector): if sandesh_ctrl.success == True: self.enqueue_event(Event(event = Event._EV_SANDESH_CTRL_MESSAGE_RECV, session = session, msg = sandesh_ctrl, source = collector)) else: # Negotiation with the Collector failed, reset the # connection and retry after sometime. self._logger.error("Negotiation with the Collector %s failed." % (collector)) self._session.close() #end on_sandesh_ctrl_msg_receive def on_sandesh_uve_msg_send(self, sandesh_uve): self.enqueue_event(Event(event = Event._EV_SANDESH_UVE_SEND, msg = sandesh_uve)) #end on_sandesh_uve_msg_send # Private functions def _create_session(self): assert self._session is None self._session = SandeshSession(self._connection.sandesh_instance(), self._active_collector, self.on_session_event, self._connection._receive_sandesh_msg) #end _create_session def _delete_session(self): if self._session: self._session.close() self._session = None self._connection.reset_collector() #end _delete_session def _start_idle_hold_timer(self): if self._idle_hold_timer is None: if self._IDLE_HOLD_TIME: self._idle_hold_timer = gevent.spawn_later(self._IDLE_HOLD_TIME, self._idle_hold_timer_expiry_handler) else: self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _start_idle_hold_timer def _cancel_idle_hold_timer(self): if self._idle_hold_timer is not None: gevent.kill(self._idle_hold_timer) self._idle_hold_timer = None #end _cancel_idle_hold_timer def _idle_hold_timer_expiry_handler(self): self._idle_hold_timer = None self.enqueue_event(Event(event = Event._EV_IDLE_HOLD_TIMER_EXPIRED)) #end _idle_hold_timer_expiry_handler def _start_connect_timer(self): if self._connect_timer is None: self._connect_timer = gevent.spawn_later(self._CONNECT_TIME, self._connect_timer_expiry_handler, self._session) #end _start_connect_timer def _cancel_connect_timer(self): if self._connect_timer is not None: gevent.kill(self._connect_timer) self._connect_timer = None #end _cancel_connect_timer def _connect_timer_expiry_handler(self, session): self._connect_timer = None self.enqueue_event(Event(event = Event._EV_CONNECT_TIMER_EXPIRED, session = session)) #end _connect_timer_expiry_handler def _is_ready_to_dequeue_event(self): return True #end _is_ready_to_dequeue_event def _dequeue_event(self, event): self._logger.info("Processing event[%s] in state[%s]" \ % (event.event, self._fsm.current)) if event.session is not None and event.session is not self._session: self._logger.info("Ignore event [%s] received for old session" \ % (event.event)) return if event.event == Event._EV_COLLECTOR_CHANGE: old_active_collector = self._active_collector self._active_collector = event.primary_collector self._backup_collector = event.secondary_collector if old_active_collector == self._active_collector: self._logger.info("No change in active collector. Ignore event [%s]" \ % (event.event)) return if event.event == Event._EV_SANDESH_UVE_SEND: if self._fsm.current == State._ESTABLISHED or self._fsm.current == State._CLIENT_INIT: self._connection.handle_sandesh_uve_msg(event.msg) else: self._logger.info("Discarding event[%s] in state[%s]" \ % (event.event, self._fsm.current)) elif event.event == Event._EV_SANDESH_CTRL_MESSAGE_RECV and \ self._fsm.current == State._ESTABLISHED: self._connection.handle_sandesh_ctrl_msg(event.msg) elif self._fsm.cannot(event.event) is True: self._logger.info("Unconsumed event[%s] in state[%s]" \ % (event.event, self._fsm.current)) else: prev_state = self.state() getattr(self._fsm, event.event)(sm = self, sm_event = event) # Log state transition self._logger.info("Sandesh Client: Event[%s] => State[%s] -> State[%s]" \ % (event.event, prev_state, self.state()))
def WorkQueueSubmitThread(task_queue=multiprocessing.Queue(), queue_lock=threading.Lock(), launch_cmd=None, env=None, collector_queue=multiprocessing.Queue(), see_worker_output=False, data_dir=".", full=False, cancel_value=multiprocessing.Value('i', 1), port=WORK_QUEUE_DEFAULT_PORT, wq_log_dir=None, project_password=None, project_password_file=None, project_name=None): logger.debug("Starting WorkQueue Submit/Wait Process") orig_ppid = os.getppid() wq_tasks = set() continue_running = True if wq_log_dir is not None: wq_debug_log = os.path.join(wq_log_dir, "debug") cctools_debug_flags_set("all") cctools_debug_config_file(wq_debug_log) logger.debug("Creating Workqueue Object") try: q = WorkQueue(port) except Exception as e: logger.error("Unable to create Workqueue object: {}", format(e)) raise e if project_name: q.specify_name(project_name) if project_password: q.specify_password(project_password) elif project_password_file: q.specify_password_file(project_password_file) # Only write Logs when the log_dir is specified, which is most likely always will be if wq_log_dir is not None: wq_master_log = os.path.join(wq_log_dir, "master_log") wq_trans_log = os.path.join(wq_log_dir, "transaction_log") if full: wq_resource_log = os.path.join(wq_log_dir, "resource_logs") q.enable_monitoring_full(dirname=wq_resource_log) q.specify_log(wq_master_log) q.specify_transactions_log(wq_trans_log) while (continue_running): # Monitor the Task Queue ppid = os.getppid() if ppid != orig_ppid: continue_running = False continue # Submit Tasks while task_queue.qsize() > 0: if cancel_value.value == 0: continue_running = False break try: # item = task_queue.get_nowait() item = task_queue.get(timeout=1) logger.debug("Removing task from queue") except queue.Empty: continue parsl_id = item["task_id"] function_data_loc = item["data_loc"] function_result_loc = item["result_loc"] function_result_loc_remote = function_result_loc.split("/")[-1] function_data_loc_remote = function_data_loc.split("/")[-1] input_files = item["input_files"] output_files = item["output_files"] std_files = item["std_files"] full_script_name = workqueue_worker.__file__ script_name = full_script_name.split("/")[-1] remapping_string = "" std_string = "" logger.debug("looking at input") for item in input_files: if item[3] == "std": std_string += "mv " + item[1] + " " + item[0] + "; " else: remapping_string += item[0] + ":" + item[1] + "," logger.debug(remapping_string) logger.debug("looking at output") for item in output_files: remapping_string += item[0] + ":" + item[1] + "," logger.debug(remapping_string) if len(input_files) + len(output_files) > 0: remapping_string = "-r " + remapping_string remapping_string = remapping_string[:-1] logger.debug(launch_cmd) command_str = launch_cmd.format( input_file=function_data_loc_remote, output_file=function_result_loc_remote, remapping_string=remapping_string) logger.debug(command_str) command_str = std_string + command_str logger.debug(command_str) logger.debug("Sending task {} with command: {}".format( parsl_id, command_str)) try: t = Task(command_str) except Exception as e: logger.error("Unable to create task: {}".format(e)) continue if env is not None: for var in env: t.specify_environment_variable(var, env[var]) t.specify_file(full_script_name, script_name, WORK_QUEUE_INPUT, cache=True) t.specify_file(function_result_loc, function_result_loc_remote, WORK_QUEUE_OUTPUT, cache=False) t.specify_file(function_data_loc, function_data_loc_remote, WORK_QUEUE_INPUT, cache=False) t.specify_tag(str(parsl_id)) for item in input_files: t.specify_file(item[0], item[1], WORK_QUEUE_INPUT, cache=item[2]) for item in output_files: t.specify_file(item[0], item[1], WORK_QUEUE_OUTPUT, cache=item[2]) for item in std_files: t.specify_file(item[0], item[1], WORK_QUEUE_OUTPUT, cache=item[2]) logger.debug("Submitting task {} to workqueue".format(parsl_id)) try: wq_id = q.submit(t) wq_tasks.add(wq_id) except Exception as e: logger.error("Unable to create task: {}".format(e)) msg = { "tid": parsl_id, "result_received": False, "reason": "Workqueue Task Start Failure", "status": 1 } collector_queue.put_nowait(msg) continue logger.debug("Task {} submitted workqueue with id {}".format( parsl_id, wq_id)) if cancel_value.value == 0: continue_running = False # Wait for Tasks task_found = True # If the queue is not empty wait on the workqueue queue for a task if not q.empty() and continue_running: while task_found is True: if cancel_value.value == 0: continue_running = False task_found = False continue t = q.wait(1) if t is None: task_found = False continue else: parsl_tid = t.tag logger.debug( "Completed workqueue task {}, parsl task {}".format( t.id, parsl_tid)) status = t.return_status task_result = t.result msg = None if status != 0 or (task_result != WORK_QUEUE_RESULT_SUCCESS and task_result != WORK_QUEUE_RESULT_OUTPUT_MISSING): if task_result == WORK_QUEUE_RESULT_SUCCESS: logger.debug( "Workqueue task {} failed with status {}". format(t.id, status)) reason = "Wrapper Script Failure: " if status == 1: reason += "command line parsing" if status == 2: reason += "problem loading function data" if status == 3: reason += "problem remapping file names" if status == 4: reason += "problem writing out function result" reason += "\nTrace:\n" + t.output logger.debug( "Workqueue runner script failed for task {} because {}\n" .format(parsl_tid, reason)) else: reason = "Workqueue system failure\n" msg = { "tid": parsl_tid, "result_received": False, "reason": reason, "status": status } collector_queue.put_nowait(msg) else: if see_worker_output: print(t.output) result_loc = os.path.join( data_dir, "task_" + str(parsl_tid) + "_function_result") logger.debug( "Looking for result in {}".format(result_loc)) f = open(result_loc, "rb") result = pickle.load(f) f.close() msg = { "tid": parsl_tid, "result_received": True, "result": result } wq_tasks.remove(t.id) collector_queue.put_nowait(msg) if continue_running is False: logger.debug("Exiting WorkQueue Master Thread event loop") break for wq_task in wq_tasks: logger.debug("Cancelling Workqueue Task {}".format(wq_task)) q.cancel_by_taskid(wq_task) logger.debug("Exiting WorkQueue Monitoring Process") return 0
def _work_queue_submit_wait(task_queue=multiprocessing.Queue(), launch_cmd=None, env=None, collector_queue=multiprocessing.Queue(), data_dir=".", full=False, shared_fs=False, autolabel=False, autolabel_window=None, autocategory=False, should_stop=None, port=WORK_QUEUE_DEFAULT_PORT, wq_log_dir=None, project_password_file=None, project_name=None): """Thread to handle Parsl app submissions to the Work Queue objects. Takes in Parsl functions submitted using submit(), and creates a Work Queue task with the appropriate specifications, which is then submitted to Work Queue. After tasks are completed, processes the exit status and exit code of the task, and sends results to the Work Queue collector thread. To avoid python's global interpreter lock with work queue's wait, this function should be launched as a process, not as a lightweight thread. This means that any communication should be done using the multiprocessing module capabilities, rather than shared memory. """ logger.debug("Starting WorkQueue Submit/Wait Process") # Enable debugging flags and create logging file wq_debug_log = None if wq_log_dir is not None: logger.debug("Setting debugging flags and creating logging file") wq_debug_log = os.path.join(wq_log_dir, "debug_log") # Create WorkQueue queue object logger.debug("Creating WorkQueue Object") try: logger.debug("Listening on port {}".format(port)) q = WorkQueue(port, debug_log=wq_debug_log) except Exception as e: logger.error("Unable to create WorkQueue object: {}".format(e)) raise e # Specify WorkQueue queue attributes if project_name: q.specify_name(project_name) if project_password_file: q.specify_password_file(project_password_file) if autolabel: q.enable_monitoring() if autolabel_window is not None: q.tune('category-steady-n-tasks', autolabel_window) # Only write logs when the wq_log_dir is specified, which it most likely will be if wq_log_dir is not None: wq_master_log = os.path.join(wq_log_dir, "master_log") wq_trans_log = os.path.join(wq_log_dir, "transaction_log") if full: wq_resource_log = os.path.join(wq_log_dir, "resource_logs") q.enable_monitoring_full(dirname=wq_resource_log) q.specify_log(wq_master_log) q.specify_transactions_log(wq_trans_log) orig_ppid = os.getppid() result_file_of_task_id = { } # Mapping taskid -> result file for active tasks. while not should_stop.value: # Monitor the task queue ppid = os.getppid() if ppid != orig_ppid: logger.debug("new Process") break # Submit tasks while task_queue.qsize() > 0 and not should_stop.value: # Obtain task from task_queue try: task = task_queue.get(timeout=1) logger.debug("Removing task from queue") except queue.Empty: continue pkg_pfx = "" if task.env_pkg is not None: pkg_pfx = "./{} -e {} ".format( os.path.basename(package_run_script), os.path.basename(task.env_pkg)) # Create command string logger.debug(launch_cmd) command_str = launch_cmd.format( package_prefix=pkg_pfx, mapping=os.path.basename(task.map_file), function=os.path.basename(task.function_file), result=os.path.basename(task.result_file)) logger.debug(command_str) # Create WorkQueue task for the command logger.debug("Sending task {} with command: {}".format( task.id, command_str)) try: t = Task(command_str) except Exception as e: logger.error("Unable to create task: {}".format(e)) collector_queue.put_nowait( WqTaskToParsl( id=task.id, result_received=False, result=None, reason="task could not be created by work queue", status=-1)) continue t.specify_category(task.category) if autolabel: q.specify_category_mode( task.category, WORK_QUEUE_ALLOCATION_MODE_MAX_THROUGHPUT) # Specify environment variables for the task if env is not None: for var in env: t.specify_environment_variable(var, env[var]) if task.env_pkg is not None: t.specify_input_file(package_run_script, cache=True) t.specify_input_file(task.env_pkg, cache=True) # Specify script, and data/result files for task t.specify_input_file(exec_parsl_function.__file__, cache=True) t.specify_input_file(task.function_file, cache=False) t.specify_input_file(task.map_file, cache=False) t.specify_output_file(task.result_file, cache=False) t.specify_tag(str(task.id)) result_file_of_task_id[str(task.id)] = task.result_file logger.debug("Parsl ID: {}".format(task.id)) # Specify input/output files that need to be staged. # Absolute paths are assumed to be in shared filesystem, and thus # not staged by work queue. if not shared_fs: for spec in task.input_files: if spec.stage: t.specify_input_file(spec.parsl_name, spec.parsl_name, cache=spec.cache) for spec in task.output_files: if spec.stage: t.specify_output_file(spec.parsl_name, spec.parsl_name, cache=spec.cache) # Submit the task to the WorkQueue object logger.debug("Submitting task {} to WorkQueue".format(task.id)) try: wq_id = q.submit(t) except Exception as e: logger.error( "Unable to submit task to work queue: {}".format(e)) collector_queue.put_nowait( WqTaskToParsl( id=task.id, result_received=False, result=None, reason="task could not be submited to work queue", status=-1)) continue logger.debug("Task {} submitted to WorkQueue with id {}".format( task.id, wq_id)) # If the queue is not empty wait on the WorkQueue queue for a task task_found = True if not q.empty(): while task_found and not should_stop.value: # Obtain the task from the queue t = q.wait(1) if t is None: task_found = False continue # When a task is found: parsl_id = t.tag logger.debug( "Completed WorkQueue task {}, parsl task {}".format( t.id, t.tag)) result_file = result_file_of_task_id.pop(t.tag) # A tasks completes 'succesfully' if it has result file, # and it can be loaded. This may mean that the 'success' is # an exception. logger.debug("Looking for result in {}".format(result_file)) try: with open(result_file, "rb") as f_in: result = pickle.load(f_in) logger.debug("Found result in {}".format(result_file)) collector_queue.put_nowait( WqTaskToParsl(id=parsl_id, result_received=True, result=result, reason=None, status=t.return_status)) # If a result file could not be generated, explain the # failure according to work queue error codes. We generate # an exception and wrap it with RemoteExceptionWrapper, to # match the positive case. except Exception as e: reason = _explain_work_queue_result(t) logger.debug( "Did not find result in {}".format(result_file)) logger.debug( "Wrapper Script status: {}\nWorkQueue Status: {}". format(t.return_status, t.result)) logger.debug( "Task with id parsl {} / wq {} failed because:\n{}". format(parsl_id, t.id, reason)) collector_queue.put_nowait( WqTaskToParsl(id=parsl_id, result_received=False, result=e, reason=reason, status=t.return_status)) logger.debug("Exiting WorkQueue Monitoring Process") return 0
def __init__(self, connection, logger, primary_collector, secondary_collector): def _update_connection_state(e, status): from connection_info import ConnectionState from gen_py.process_info.ttypes import ConnectionType collector_addr = e.sm._active_collector if collector_addr is None: collector_addr = '' ConnectionState.update(conn_type = ConnectionType.COLLECTOR, name = '', status = status, server_addrs = [collector_addr], message = '%s to %s on %s' % (e.src, e.dst, e.event)) #end _update_connection_state def _connection_state_up(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.UP) #end _connection_state_up def _connection_state_down(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.DOWN) #end _connection_state_down def _connection_state_init(e): from gen_py.process_info.ttypes import ConnectionStatus _update_connection_state(e, ConnectionStatus.INIT) #end _connection_state_init def _on_idle(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # Reset active and backup collector self._active_collector = self._connection.primary_collector() self._backup_collector = self._connection.secondary_collector() # clean up existing connection e.sm._delete_session() if e.sm._disable != True: e.sm._start_idle_hold_timer() # update connection state _connection_state_down(e) #end _on_idle def _on_disconnect(e): # update connection state _connection_state_down(e) #end _on_disconnect def _on_connect(e): if e.sm._idle_hold_timer is not None: e.sm._cancel_idle_hold_timer() e.sm._connection.reset_collector() # clean up existing connection e.sm._delete_session() if e.sm._active_collector is not None: # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_COLLECTOR_UNKNOWN)) #end _on_connect def _on_connect_to_backup(e): if e.sm._connect_timer is not None: e.sm._cancel_connect_timer() # clean up existing connection e.sm._delete_session() # try to connect to the backup collector, if known if e.sm._backup_collector is not None: e.sm._active_collector, e.sm._backup_collector = \ e.sm._backup_collector, e.sm._active_collector # update connection state _connection_state_init(e) e.sm._create_session() e.sm._start_connect_timer() e.sm._session.connect() else: e.sm.enqueue_event(Event(event = Event._EV_BACKUP_COLLECTOR_UNKNOWN)) #end _on_connect_to_backup def _on_client_init(e): e.sm._connects += 1 gevent.spawn(e.sm._session.read) e.sm._connection.handle_initialized(e.sm._connects) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_init(e) #end _on_client_init def _on_established(e): e.sm._cancel_connect_timer() e.sm._connection.set_collector(e.sm_event.source) e.sm._connection.handle_sandesh_ctrl_msg(e.sm_event.msg) e.sm._connection.sandesh_instance().send_generator_info() # update connection state _connection_state_up(e) #end _on_established # FSM - Fysom self._fsm = Fysom({ 'initial': {'state' : State._IDLE, 'event' : Event._EV_START, 'defer' : True }, 'events': [ # _IDLE {'name' : Event._EV_IDLE_HOLD_TIMER_EXPIRED, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._IDLE, 'dst' : State._CONNECT }, {'name' : Event._EV_START, 'src' : State._IDLE, 'dst' : State._CONNECT }, # _DISCONNECT {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._DISCONNECT, 'dst' : State._CONNECT }, # _CONNECT {'name' : Event._EV_COLLECTOR_UNKNOWN, 'src' : State._CONNECT, 'dst' : State._DISCONNECT }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT, 'dst' : State._CLIENT_INIT }, # _CONNECT_TO_BACKUP {'name' : Event._EV_BACKUP_COLLECTOR_UNKNOWN, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECT_FAIL, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CONNECTED, 'src' : State._CONNECT_TO_BACKUP, 'dst' : State._CLIENT_INIT }, # _CLIENT_INIT {'name' : Event._EV_CONNECT_TIMER_EXPIRED, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_TCP_CLOSE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._CLIENT_INIT, 'dst' : State._IDLE }, {'name' : Event._EV_SANDESH_CTRL_MESSAGE_RECV, 'src' : State._CLIENT_INIT, 'dst' : State._ESTABLISHED }, # _ESTABLISHED {'name' : Event._EV_TCP_CLOSE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT_TO_BACKUP }, {'name' : Event._EV_STOP, 'src' : State._ESTABLISHED, 'dst' : State._IDLE }, {'name' : Event._EV_COLLECTOR_CHANGE, 'src' : State._ESTABLISHED, 'dst' : State._CONNECT } ], 'callbacks': { 'on' + State._IDLE : _on_idle, 'on' + State._CONNECT : _on_connect, 'on' + State._CONNECT_TO_BACKUP : _on_connect_to_backup, 'on' + State._CLIENT_INIT : _on_client_init, 'on' + State._ESTABLISHED : _on_established, } }) self._connection = connection self._session = None self._connects = 0 self._disable = False self._idle_hold_timer = None self._connect_timer = None self._active_collector = primary_collector self._backup_collector = secondary_collector self._logger = logger self._event_queue = WorkQueue(self._dequeue_event, self._is_ready_to_dequeue_event)