def test_exit_callback(): to_child = mp_context.Queue() from_child = mp_context.Queue() evt = Event() @gen.coroutine def on_stop(_proc): assert _proc is proc yield gen.moment evt.set() # Normal process exit proc = AsyncProcess(target=feed, args=(to_child, from_child)) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() to_child.put(None) yield evt.wait(timedelta(seconds=3)) assert evt.is_set() assert not proc.is_alive() # Process terminated proc = AsyncProcess(target=wait) evt.clear() proc.set_exit_callback(on_stop) proc.daemon = True yield proc.start() yield gen.sleep(0.05) assert proc.is_alive() assert not evt.is_set() yield proc.terminate() yield evt.wait(timedelta(seconds=3)) assert evt.is_set()
class connBase(): def __init__(self, ioloop, isServer): self.ioloop = ioloop self.rEvent = Event() self.wEvent = Event() self.writeLock = threading.Lock() self.readLock = threading.Lock() self.writeBuffer = b'' self.readBuffer = b'' PeriodicCallback(self.calOutputSize, 10000).start() IOLoop.instance().add_callback(self.toStream) IOLoop.instance().add_callback(self.stream_to_map) self.outputMap_byId = {} self.outputSize = 0 self.outputSizeDownEvent = Event() self.connMap = {} self.eachConnWriteLimit = eachConnWriteLimit self.addTaskSeq = 0 self.addTaskMap = {} self.waitIdMap = {} self.waitIdEvent = Event() self.streamCloseSign = {} self.isServer = isServer if isServer: self.writeBeforeConnMap = {} PeriodicCallback(self.deal_writeBeforeConnMap, 1000).start() def checkStreamClose(self, conn_seq, stream): self.streamCloseSign[conn_seq] -= 1 if self.streamCloseSign[conn_seq] == 0: del self.streamCloseSign[conn_seq] try: stream.close() except: pass def deal_writeBeforeConnMap(self): for k in list(self.writeBeforeConnMap.keys()): v = self.writeBeforeConnMap[k] if v['createTime'] < getRunningTime() - 100: del self.writeBeforeConnMap[k] def calOutputSize(self): co = 0 for k, v in self.connMap.items(): co += len(v['readBuffer']) for k, v in self.outputMap_byId.items(): co += v['msg'].length if co < self.outputSize: self.outputSizeDownEvent.set() self.outputSize = co def addConnMap(self, conn_seq): m = {} m['readError'] = False m['writeError'] = False m['writeNotBack'] = 0 m['readBuffer'] = b'' m['rEvent'] = Event() self.connMap[conn_seq] = m self.streamCloseSign[conn_seq] = 2 @gen.coroutine def toStream(self): while True: yield self.wEvent.wait() self.wEvent.clear() self.writeLock.acquire() l = sorted(self.addTaskMap.keys()) for i in l: msg = self.addTaskMap[i]['con'] e = self.addTaskMap[i]['event'] self.writeBuffer += msg.pack() e.set() del self.addTaskMap[i] if len(self.writeBuffer) > con_streamBufferSize: break self.writeLock.release() @gen.coroutine def addTask(self, msg): self.addTaskSeq += 1 e = Event() self.addTaskMap[self.addTaskSeq] = {'con': msg, 'event': e} yield e.wait() def checkDelConn(self, conn_seq): if self.connMap[conn_seq]['writeError'] and self.connMap[conn_seq][ 'readError']: self.connMap[conn_seq]['rEvent'].set() del self.connMap[conn_seq] @gen.coroutine def doWrite(self, stream, conn_seq): while True: if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return closeSign = False try: data = yield gen.with_timeout (timedelta(seconds=connCheckTime),\ stream.read_bytes(eachConnWriteLimit,partial = True), quiet_exceptions=(StreamClosedError)) except StreamClosedError: closeSign = True except gen.TimeoutError: try: IOLoop.current().remove_handler(stream.socket) state = (stream._state & ~IOLoop.current().READ) stream._state = None stream._read_callback = None stream._read_future = None stream._add_io_state(state) data = b'' except: closeSign = True except: raise Exception if closeSign: pack = {'type': 'readError', 'conn_seq': conn_seq} msg = TOUMsg(pack, b'') yield self.addTask(msg) if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return self.connMap[conn_seq]['writeError'] = True self.checkDelConn(conn_seq) self.checkStreamClose(conn_seq, stream) return if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return while True: if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return if self.connMap[conn_seq]['writeError']: self.checkStreamClose(conn_seq, stream) return if self.connMap[conn_seq][ 'writeNotBack'] < self.eachConnWriteLimit: break yield self.connMap[conn_seq]['rEvent'].wait() if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return self.connMap[conn_seq]['rEvent'].clear() if data == b'': continue con = {'type': 'write', 'conn_seq': conn_seq} msg = TOUMsg(con, data) self.connMap[conn_seq]['writeNotBack'] = self.connMap[conn_seq][ 'writeNotBack'] + len(data) yield self.addTask(msg) @gen.coroutine def doRead(self, stream, conn_seq): while True: if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return if self.connMap[conn_seq]['readBuffer'] == b'' and self.connMap[ conn_seq]['readError']: self.checkStreamClose(conn_seq, stream) return if self.connMap[conn_seq]['readBuffer'] == b'': yield self.connMap[conn_seq]['rEvent'].wait() if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return self.connMap[conn_seq]['rEvent'].clear() continue s = self.connMap[conn_seq]['readBuffer'] self.connMap[conn_seq]['readBuffer'] = b'' try: yield stream.write(s) con = { 'type': 'writeBack', 'conn_seq': conn_seq, 'length': len(s) } msg = TOUMsg(con, b'') yield self.addTask(msg) except StreamClosedError: pack = {'type': 'writeError', 'conn_seq': conn_seq} msg = TOUMsg(pack, b'') yield self.addTask(msg) if conn_seq not in self.connMap: self.checkStreamClose(conn_seq, stream) return self.connMap[conn_seq]['readError'] = True self.checkDelConn(conn_seq) self.checkStreamClose(conn_seq, stream) return except: raise Exception @gen.coroutine def stream_to_map(self): while True: yield self.rEvent.wait() self.rEvent.clear() while True: while self.outputSize > tcpManagerCacheSize: yield self.outputSizeDownEvent.wait() self.outputSizeDownEvent.clear() msg = TOUMsg() self.readLock.acquire() r, self.readBuffer = msg.unpack(self.readBuffer) self.readLock.release() if not r: break json = msg.m_json if 'conn_seq' not in json: if not self.isServer and json['id'] in self.waitIdMap: self.outputMap_byId[json['id']] = { 'msg': msg, 'createTime': getRunningTime() } self.waitIdMap[json['id']]['event'].set() elif self.isServer: self.outputMap_byId[json['id']] = { 'msg': msg, 'createTime': getRunningTime() } self.waitIdEvent.set() continue ty = json['type'] conn_seq = json['conn_seq'] if conn_seq not in self.connMap: if not self.isServer: continue if conn_seq not in self.writeBeforeConnMap: m = {'createTime': getRunningTime(), 'buffer': b''} self.writeBeforeConnMap[conn_seq] = m self.writeBeforeConnMap[conn_seq][ 'buffer'] = self.writeBeforeConnMap[conn_seq][ 'buffer'] + msg.strContetn continue e = self.connMap[conn_seq]['rEvent'] e.set() if ty == 'write': self.connMap[conn_seq]['readBuffer'] = self.connMap[ conn_seq]['readBuffer'] + msg.strContetn elif ty == 'readError': self.connMap[conn_seq]['readError'] = True elif ty == 'writeBack': self.connMap[conn_seq]['writeNotBack'] = self.connMap[ conn_seq]['writeNotBack'] - json['length'] elif ty == 'writeError': self.connMap[conn_seq]['writeError'] = True self.checkDelConn(conn_seq)
class Connection: """ Connection class. The connection class should not be initiated directly, but through the connect method .. code-block:: python >>> from diplomacy.client.connection import connect >>> connection = await connect(hostname, port) Properties: - **hostname**: :class:`str` hostname to connect (e.g. 'localhost') - **port**: :class:`int` port to connect (e.g. 8888) - **use_ssl**: :class:`bool` telling if connection should be securized (True) or not (False). - **url**: (property) :class:`str` websocket url to connect (generated with hostname and port) - **connection**: :class:`tornado.websocket.WebSocketClientConnection` a tornado websocket connection object - **connection_count**: :class:`int` number of successful connections from this Connection object. Used to check if message callbacks is already launched (if count > 0). - **is_connecting**: :class:`tornado.locks.Event` a tornado Event used to keep connection status. No request can be sent while is_connecting. If connected, Synchronize requests can be sent immediately even if is_reconnecting. Other requests must wait full reconnection. - **is_reconnecting**: :class:`tornado.locks.Event` a tornado Event used to keep re-connection status. Non-synchronize request cannot be sent while is_reconnecting. If reconnected, all requests can be sent. - **channels**: a :class:`weakref.WeakValueDictionary` mapping channel token to :class:`.Channel` object. - **requests_to_send**: a :class:`Dict` mapping a request ID to the context of a request **not sent**. If we are disconnected when trying to send a request, then request context is added to this dictionary to be send later once reconnected. - **requests_waiting_responses**: a :class:`Dict` mapping a request ID to the context of a request **sent**. Contains requests that are waiting for a server response. - **unknown_tokens**: :class:`set` a set of unknown tokens. We can safely ignore them, as the server has been notified. """ __slots__ = [ 'hostname', 'port', 'use_ssl', 'connection', 'is_connecting', 'is_reconnecting', 'connection_count', 'channels', 'requests_to_send', 'requests_waiting_responses', 'unknown_tokens' ] def __init__(self, hostname, port, use_ssl=False): """ Constructor The connection class should not be initiated directly, but through the connect method .. code-block:: python >>> from diplomacy.client.connection import connect >>> connection = await connect(hostname, port) :param hostname: hostname to connect (e.g. 'localhost') :param port: port to connect (e.g. 8888) :param use_ssl: telling if connection should be securized (True) or not (False). :type hostname: str :type port: int :type use_ssl: bool """ self.hostname = hostname self.port = port self.use_ssl = bool(use_ssl) self.connection = None self.connection_count = 0 self.is_connecting = Event() self.is_reconnecting = Event() self.channels = weakref.WeakValueDictionary() # {token => Channel} self.requests_to_send = {} # type: Dict[str, RequestFutureContext] self.requests_waiting_responses = { } # type: Dict[str, RequestFutureContext] self.unknown_tokens = set() # When connection is created, we are not yet connected, but reconnection does not matter # (we consider we are reconnected). self.is_reconnecting.set() url = property(lambda self: '%s://%s:%d' % ('wss' if self.use_ssl else 'ws', self.hostname, self.port)) # =================== # Public Methods. # =================== @gen.coroutine def authenticate(self, username, password): """ Send a :class:`.SignIn` request. User will be created on the server automatically if it doesn't exist. :param username: username :param password: password :return: a :class:`.Channel` object representing the authentication. :type username: str :type password: str :rtype: diplomacy.client.channel.Channel """ request = requests.SignIn(username=username, password=password) return (yield self.send(request)) @gen.coroutine def get_daide_port(self, game_id): """ Send a :class:`.GetDaidePort` request. :param game_id: game id for which to retrieve the DAIDE port. :return: the game DAIDE port :type game_id: str :rtype: int """ request = requests.GetDaidePort(game_id=game_id) return (yield self.send(request)) # =================== # Private Methods # =================== @gen.coroutine def _connect(self, message=None): """ Create (force) a tornado websocket connection. Try NB_CONNECTION_ATTEMPTS attempts, waiting for ATTEMPT_DELAY_SECONDS seconds between 2 attempts. Raise an exception if it cannot connect. :param message: if provided, print this message as a logger info before starting to connect. :type message: str, optional """ if message: LOGGER.info(message) # We are connecting. self.is_connecting.clear() # Create a connection (currently using websockets). self.connection = None for attempt_index in range(constants.NB_CONNECTION_ATTEMPTS): try: future_connection = websocket_connect(self.url) self.connection = yield gen.with_timeout( timedelta(seconds=constants.ATTEMPT_DELAY_SECONDS), future_connection) break except (gen.TimeoutError, ConnectionAbortedError, ConnectionError, ConnectionRefusedError, ConnectionResetError) as ex: if attempt_index + 1 == constants.NB_CONNECTION_ATTEMPTS: raise ex LOGGER.warning('Connection failing (attempt %d), retrying.', attempt_index + 1) yield gen.sleep(constants.ATTEMPT_DELAY_SECONDS) if not self.connection_count: # Start receiving messages as soon as we are connected. ioloop.IOLoop.current().add_callback(self._handle_socket_messages) # We are connected. self.connection_count += 1 self.is_connecting.set() LOGGER.info('Connection succeeds.') @gen.coroutine def _reconnect(self): """ Reconnect. """ # We are reconnecting. self.is_reconnecting.clear() yield self._connect('Trying to reconnect.') # We will be reconnected when method Reconnection.sync_done() will finish. _Reconnection(self).reconnect() @gen.coroutine def _on_socket_message(self, socket_message): """ Manage given socket_message (string), that may be a string representation of either a request or a notification. """ # Check response format and run callback (if defined). try: json_message = json.loads(socket_message) except ValueError: LOGGER.exception('Unable to parse JSON from a socket message.') return if not isinstance(json_message, dict): LOGGER.error("Unable to convert a JSON string to a dictionary.") return request_id = json_message.get(strings.REQUEST_ID, None) notification_id = json_message.get(strings.NOTIFICATION_ID, None) if request_id: if request_id not in self.requests_waiting_responses: # Response received before the request was marked as 'waiting responses' # Waiting 5 secs to make sure this is not a race condition before aborting for _ in range(10): yield gen.sleep(0.5) if request_id in self.requests_waiting_responses: break else: LOGGER.error('Unknown request.') return request_context = self.requests_waiting_responses.pop( request_id) # type: RequestFutureContext try: response = responses.parse_dict(json_message) managed_data = handle_response(request_context, response) request_context.future.set_result(managed_data) except exceptions.ResponseException as ex: LOGGER.error('Error received for request %s: %s', request_context.request.name, ex) LOGGER.debug('Full request was: %s', request_context.request.to_dict()) request_context.future.set_exception(ex) elif notification_id: notification = notifications.parse_dict(json_message) if notification.token not in self.channels: if notification.token not in self.unknown_tokens: LOGGER.error('Unknown notification: %s', notification.name) self._handle_unknown_token(notification.token) return notification_managers.handle_notification(self, notification) else: LOGGER.error('Unknown socket message.') @gen.coroutine def _handle_socket_messages(self): """ Main looping method used to received connection messages. """ while True: msg = yield self.connection.read_message() if msg is None: # Reconnect. LOGGER.error('Disconnected.') yield self._reconnect() else: # Check response format and run callback (if defined). yield self._on_socket_message(msg) def _handle_unknown_token(self, token): """ Notify server about an unknown channel token. This is likely because the channel has gone out of scope. :param token: token to notify server with. """ # Send notification request without waiting any server response. Ignore errors if any. try: self.unknown_tokens.add(token) self.connection.write_message( requests.UnknownToken(token=token).json()) except (WebSocketClosedError, StreamClosedError): pass def _register_to_send(self, request_context): """ Register given request context as a request to send as soon as possible. :param request_context: context of request to send. :type request_context: RequestFutureContext """ self.requests_to_send[request_context.request_id] = request_context def send(self, request, for_game=None): """ Send a request. :param request: request object. :param for_game: (optional) NetworkGame object (required for game requests). :return: a Future that returns the response handler result of this request. :type request: diplomacy.communication.requests._AbstractRequest :type for_game: diplomacy.client.network_game.NetworkGame, optional :rtype: Future """ request_future = Future() request_context = RequestFutureContext(request=request, future=request_future, connection=self, game=for_game) self.write_request(request_context).add_done_callback( _MessageWrittenCallback(request_context).callback) return gen.with_timeout( timedelta(seconds=constants.REQUEST_TIMEOUT_SECONDS), request_future) def write_request(self, request_context): """ Write a request into internal connection object. :param request_context: context of request to send. :type request_context: RequestFutureContext """ future = Future() request = request_context.request def on_message_written(write_future): """ 3) Writing returned, set future as done (with writing result) or with writing exception. """ exception = write_future.exception() if exception is not None: future.set_exception(exception) else: future.set_result(write_future.result()) def on_connected(reconnected_future): """ 2) Send request. """ exception = reconnected_future.exception() if exception is not None: LOGGER.error( 'Fatal (re)connection error occurred while sending a request.' ) future.set_exception(exception) else: try: if self.connection is None: raise WebSocketClosedError() write_future = self.connection.write_message( request.json()) except (WebSocketClosedError, StreamClosedError) as exc: # We were disconnected. # Save request context as a request to send. # We will re-try to send it later once reconnected. self._register_to_send(request_context) # Transfer exception to returned future. future.set_exception(exc) else: write_future.add_done_callback(on_message_written) # 1) Synchronize requests just wait for connection. # Other requests wait for reconnection (which also implies connection). if isinstance(request, requests.Synchronize): self.is_connecting.wait().add_done_callback(on_connected) else: self.is_reconnecting.wait().add_done_callback(on_connected) return future
class MonitoringLoop(object): def __init__(self, check_time, fx_correlator_object): self.instrument = fx_correlator_object self.hosts = self.instrument.fhosts + self.instrument.xhosts self.selected_host = None self.host_index = 0 self.num_hosts = len(self.hosts) self.num_fhosts = len(self.instrument.fhosts) self.num_xhosts = len(self.instrument.xhosts) # self.num_bhosts = len(self.instrument.bhosts) # check config file if bhosts or xhosts if check_time == -1: self.check_time = float(self.instrument.configd['FxCorrelator']['monitor_loop_time']) else: self.check_time = check_time # set up periodic engine monitoring self.instrument_monitoring_loop_enabled = IOLoopEvent() self.instrument_monitoring_loop_enabled.clear() self.instrument_monitoring_loop_cb = None self.f_eng_board_monitoring_dict_prev = {} self.x_eng_board_monitoring_dict_prev = {} self.b_eng_board_monitoring_dict_prev = {} self.disabled_fhosts = [] self.disabled_xhosts = [] self.disabled_bhosts = [] # some other useful bits of info self.n_chans = self.instrument.n_chans self.chans_per_xhost = self.n_chans / self.num_xhosts def start(self): """ Start the monitoring loop :return: none """ self._instrument_monitoring_loop_timer_start(check_time=self.check_time) def stop(self): """ Stop the monitoring loop :return: none """ self._instrument_monitoring_loop_timer_stop() def _instrument_monitoring_loop_timer_start(self, check_time=None): """ Set up periodic check of various instrument elements :param check_time: the interval, in seconds, at which to check :return: """ if not IOLoop.current()._running: raise RuntimeError('IOLoop not running, this will not work') self.instrument.logger.info('instrument_monitoring_loop for instrument %s ' 'set up with a period ' 'of %.2f seconds' % (self.instrument.descriptor, self.check_time)) if self.instrument_monitoring_loop_cb is not None: self.instrument_monitoring_loop_cb.stop() self.instrument_monitoring_loop_cb = PeriodicCallback( self._instrument_monitoring_loop, check_time * 1000) self.instrument_monitoring_loop_enabled.set() self.instrument_monitoring_loop_cb.start() self.instrument.logger.info('Instrument Monitoring Loop Timer ' 'Started @ ' '%s' % time.ctime()) def _instrument_monitoring_loop_timer_stop(self): """ Disable the periodic instrument monitoring loop :return: """ if self.instrument_monitoring_loop_cb is not None: self.instrument_monitoring_loop_cb.stop() self.instrument_monitoring_loop_cb = None self.instrument_monitoring_loop_enabled.clear() self.instrument.logger.info('Instrument Monitoring Loop Timer Halted @ ' '%s' % time.ctime()) # TODO: use functools to pass this callback function with parameters def _instrument_monitoring_loop(self, check_fhosts=True, check_xhosts=True, check_bhosts=False): """ Perform various checks periodically. :param corner_turner_check: enable periodic checking of the corner- turner; will disable F-engine output on overflow :param coarse_delay_check: enable periodic checking of the coarse delay :param vacc_check: enable periodic checking of the vacc turner :return: """ # TODO: refactor this to handle checking of all host types # TODO: run all tests on everything? # TODO: figure out how to selectively test pieces # select a new host host = self.hosts[self.host_index] board_monitoring_dict_current = {} # check host type if host.host_type == 'fhost': if check_fhosts: board_monitoring_dict_current[host] = self._get_fhost_status(host=host) # check error counters if all fhosts have status if len(self.f_eng_board_monitoring_dict_prev) == self.num_fhosts: self._check_fhost_errors(board_monitoring_dict_current, host) self.f_eng_board_monitoring_dict_prev[host] = \ board_monitoring_dict_current[host] elif host.host_type == 'xhost' or host.host_type == 'bhost': if check_xhosts: board_monitoring_dict_current[host] = self._get_xhost_status(host=host) # check errs if all xhosts have status if len(self.x_eng_board_monitoring_dict_prev) == self.num_xhosts: self._check_xhost_errors(board_monitoring_dict_current, host) self.x_eng_board_monitoring_dict_prev[host] = \ board_monitoring_dict_current[host] # TODO: how to handle bhosts and xhosts? elif host.host_type == 'bhost': if check_bhosts: pass # increment board counter, move to the next board the next time # loop runs if self.host_index == self.num_hosts - 1: if not self.disabled_fhosts and not self.disabled_xhosts and not self.disabled_bhosts: self.instrument.logger.info('Monitoring loop run ok. All hosts checked - no hosts disabled') else: self.instrument.logger.warning( 'Monitoring loop run ok. All hosts checked - some hosts disabled') # list the disabled fhosts if self.disabled_fhosts: self.instrument.logger.warning( 'corr2 monitor loop: disabled f-hosts: %s' % [ 'fhost%d:%s:%s' % ( disabled_fhost.fhost_index, disabled_fhost.host, [feng.input.name for feng in disabled_fhost.fengines]) for disabled_fhost in self.disabled_fhosts]) # list the disabled xhosts if self.disabled_xhosts: self.instrument.logger.warning( 'corr2 monitor loop: disabled x-hosts: %s' % ['xhost%d:%s:%d-%d' % ( disabled_xhost.index, disabled_xhost.host, self.instrument.xops.board_ids[ disabled_xhost.host] * self.chans_per_xhost, (self.instrument.xops.board_ids[ disabled_xhost.host] + 1) * self.chans_per_xhost - 1) for disabled_xhost in self.disabled_xhosts]) # reset the host counter to start checking again self.host_index = 0 else: self.host_index += 1 #if self.disabled_bhosts: # self.instrument.logger.warning('corr2 monitor loop: disabled b-hosts: %s' % [ # '%d:%s' % (disabled_bhost.index, disabled_bhost.host) for disabled_bhost in self.disabled_bhosts]) return True def _get_fhost_status(self, host, corner_turner_check=True, coarse_delay_check=True, rx_reorder_check=True): """ Checks the f-hosts for errors :return: """ status = {} # check ct & cd if corner_turner_check: # perform corner-turner check ct_status = host.get_ct_status() status['corner_turner'] = ct_status if coarse_delay_check: # perform coarse delay check cd_status = host.get_cd_status() status['coarse_delay'] = cd_status # check feng rx reorder if rx_reorder_check: feng_rx_reorder_status = host.get_rx_reorder_status() status['feng_rx_reorder'] = feng_rx_reorder_status return status def _check_fhost_errors(self, board_monitoring_dict_current, host): """ :param board_monitoring_dict_current: :param host: :return: """ action = {'disable_output': 0, 'reenable_output': 0} coarse_delay_action = self._check_feng_coarse_delay_errs( board_monitoring_dict_current[host], host) corner_turner_action = self._check_feng_corner_turn_errs( board_monitoring_dict_current[host], host) rx_reorder_action = self._check_feng_rx_reorder_errs( board_monitoring_dict_current[host], host) # consolidate the action dictionary - only reenable if all # errors are cleared if coarse_delay_action['disable_output'] \ or corner_turner_action['disable_output'] \ or rx_reorder_action['disable_output']: action['disable_output'] = True elif coarse_delay_action['reenable_output'] \ or corner_turner_action['reenable_output'] \ or rx_reorder_action['reenable_output']: action['reenable_output'] = True else: # no action required pass # take appropriate action on board if action['disable_output']: # keep track of which boards have been disabled if host not in self.disabled_fhosts: self.disabled_fhosts.append(host) self._disable_feng_ouput(fhost=host) elif action['reenable_output']: # after checking, we already know that this board was # disabled prior self._renable_feng_output(fhost=host) # remove the board from the list of disabled boards self.disabled_fhosts.remove(host) else: # no action taken pass def _get_xhost_status(self, host, xeng_rx_reorder_check=False, xeng_hmc_reorder_check=True, xeng_vacc_check=True): """ :param host: :param xeng_rx_reorder_check: :param xeng_hmc_reorder_check: :param xeng_vacc_check: :return: """ status = {} # check xeng rx reorder if xeng_rx_reorder_check: xeng_rx_reorder_status = host.get_rx_reorder_status() status['xeng_rx_reorder'] = xeng_rx_reorder_status # check xeng hmc reorder if xeng_hmc_reorder_check: xeng_hmc_reorder_status = host.get_hmc_reorder_status() status['xeng_hmc_reorder'] = xeng_hmc_reorder_status # check xeng vacc if xeng_vacc_check: xeng_vacc_status = host.get_vacc_status() status['xeng_vacc'] = xeng_vacc_status return status def _check_xhost_errors(self, board_monitoring_dict_current, host): """ :param board_monitoring_dict_current: :param host: :return: """ action = {'disable_output': 0, 'reenable_output': 0} hmc_reorder_action = self._check_xeng_hmc_reorder_errs( board_monitoring_dict_current[host], host) vacc_action = self._check_xeng_vacc_errs( board_monitoring_dict_current[host], host) # consolidate the action dictionary - only reenable if all # errors are cleared if hmc_reorder_action['disable_output'] \ or vacc_action['disable_output']: action['disable_output'] = True elif hmc_reorder_action['reenable_output'] \ or vacc_action['reenable_output']: action['reenable_output'] = True else: # no action required pass # take appropriate action on board if action['disable_output']: # keep track of which boards have been disabled if host not in self.disabled_xhosts: self.disabled_xhosts.append(host) self._disable_xeng_ouput(xhost=host) elif action['reenable_output']: # after checking, we already know that this board was # disabled prior self._renable_xeng_output(xhost=host) # remove the board from the list of disabled boards self.disabled_xhosts.remove(host) else: # no action taken pass self.x_eng_board_monitoring_dict_prev[host] = \ board_monitoring_dict_current[host] def _check_xeng_rx_reorder_errs(self, x_eng_status_dict, xhost): """ :param x_eng_status_dict: :param xhost: :return: """ raise NotImplementedError def _check_xeng_hmc_reorder_errs(self, x_eng_status_dict, xhost): """ :param x_eng_status_dict: :param xhost: :return: """ action_dict = {'disable_output': False, 'reenable_output': False} if x_eng_status_dict.has_key('xeng_hmc_reorder'): hmc_reorder_dict = x_eng_status_dict['xeng_hmc_reorder'] # counters # check error counters, first check if a previous status # dict exists # flags if not hmc_reorder_dict['init_done']: self.instrument.logger.warning( 'xhost %s hmc reorder has init errors' % xhost.host) action_dict['disable_output'] = True if not hmc_reorder_dict['post_ok']: self.instrument.logger.warning('xhost %s hmc reorder ' 'has post errors' % xhost.host) action_dict['disable_output'] = True # check error counters if self.x_eng_board_monitoring_dict_prev: hmc_reorder_dict_prev = \ self.x_eng_board_monitoring_dict_prev[xhost][ 'xeng_hmc_reorder'] #TODO Ignore CRC errors on the HMCs for now... # if hmc_reorder_dict['err_cnt_link2'] != hmc_reorder_dict_prev[ # 'err_cnt_link2']: # self.instrument.logger.warning( # 'xhost %s hmc reorder has errors on link 2' % # xhost.host) # action_dict['disable_output'] = True # if hmc_reorder_dict['err_cnt_link3'] != hmc_reorder_dict_prev[ # 'err_cnt_link3']: # self.instrument.logger.warning( # 'xhost %s hmc reorder has errors on link 3' % # xhost.host) # action_dict['disable_output'] = True if hmc_reorder_dict['lnk2_nrdy_err_cnt'] != hmc_reorder_dict_prev[ 'lnk2_nrdy_err_cnt']: self.instrument.logger.warning( 'xhost %s hmc reorder has link 2 nrdy errors' % xhost.host) action_dict['disable_output'] = True if hmc_reorder_dict['lnk3_nrdy_err_cnt'] != hmc_reorder_dict_prev[ 'lnk3_nrdy_err_cnt']: self.instrument.logger.warning( 'xhost %s hmc reorder has link 3 nrdy errors' % xhost.host) action_dict['disable_output'] = True if hmc_reorder_dict['mcnt_timeout_cnt'] != hmc_reorder_dict_prev[ 'mcnt_timeout_cnt']: self.instrument.logger.warning( 'xhost %s hmc reorder has mcnt timeout errors' % xhost.host) action_dict['disable_output'] = True if hmc_reorder_dict['ts_err_cnt'] != hmc_reorder_dict_prev[ 'ts_err_cnt']: self.instrument.logger.warning( 'xhost %s hmc reorder has timestamp errors' % xhost.host) action_dict['disable_output'] = True # if no errors, check if board was disabled, then flag for reenable if not action_dict['disable_output']: # no errors detected, no need to disable any boards # but was the board previously disabled? if xhost in self.disabled_xhosts: # the errors on the board have been cleared action_dict['reenable_output'] = True return action_dict def _check_xeng_vacc_errs(self, x_eng_status_dict, xhost): """ :param x_eng_status_dict: :param xhost: :return: """ action_dict = {'disable_output': False, 'reenable_output': False} if x_eng_status_dict.has_key('xeng_vacc'): vacc_dict = x_eng_status_dict['xeng_vacc'] # counters # check error counters, first check if a previous status # dict exists # check error counters if self.x_eng_board_monitoring_dict_prev: vacc_dict_prev = \ self.x_eng_board_monitoring_dict_prev[xhost][ 'xeng_vacc'] for vacc in range(len(vacc_dict)): # there are four vaccs per xhost if vacc_dict[vacc]['err_cnt'] != vacc_dict_prev[vacc]['err_cnt']: self.instrument.logger.warning( 'xhost %s vacc has errors' % xhost.host) action_dict['disable_output'] = True # if no errors, check if board was disabled, then flag for reenable if not action_dict['disable_output']: # no errors detected, no need to disable any boards # but was the board previously disabled? if xhost in self.disabled_xhosts: # the errors on the board have been cleared action_dict['reenable_output'] = True return action_dict def _check_feng_rx_reorder_errs(self, f_eng_status_dict, fhost): """ :param f_eng_status_dict: :param fhost: :return: """ action_dict = {'disable_output': False, 'reenable_output': False} if f_eng_status_dict.has_key('feng_rx_reorder'): rx_reorder_dict = f_eng_status_dict['feng_rx_reorder'] # counters # check error counters, first check if a previous status # dict exists if self.f_eng_board_monitoring_dict_prev: rx_reorder_dict_prev = \ self.f_eng_board_monitoring_dict_prev[fhost][ 'feng_rx_reorder'] # check error counters if rx_reorder_dict['overflow_err_cnt'] != rx_reorder_dict_prev[ 'overflow_err_cnt']: self.instrument.logger.warning( 'fhost %s rx reorder has overflow errors' % fhost.host) action_dict['disable_output'] = True if rx_reorder_dict['receive_err_cnt'] != rx_reorder_dict_prev[ 'receive_err_cnt']: self.instrument.logger.warning( 'fhost %s rx reorder has receive errors' % fhost.host) action_dict['disable_output'] = True if rx_reorder_dict['relock_err_cnt'] != rx_reorder_dict_prev[ 'relock_err_cnt']: self.instrument.logger.warning( 'fhost %s rx reorder has relock errors' % fhost.host) action_dict['disable_output'] = True if rx_reorder_dict['timestep_err_cnt'] != rx_reorder_dict_prev[ 'timestep_err_cnt']: self.instrument.logger.warning( 'fhost %s rx reorder has timestep errors' % fhost.host) action_dict['disable_output'] = True # if no errors, check if board was disabled, then flag for reenable if not action_dict['disable_output']: # no errors detected, no need to disable any boards # but was the board previously disabled? if fhost in self.disabled_fhosts: # the errors on the board have been cleared action_dict['reenable_output'] = True return action_dict def _check_feng_coarse_delay_errs(self, f_eng_status_dict, fhost): """ Check f-engines for any coarse delay errors :return: """ action_dict = {'disable_output': False, 'reenable_output': False} if f_eng_status_dict.has_key('coarse_delay'): cd_dict = f_eng_status_dict['coarse_delay'] # flags if not cd_dict['hmc_init']: self.instrument.logger.warning( 'fhost %s coarse delay has hmc init errors' % fhost.host) action_dict['disable_output'] = True if not cd_dict['hmc_post']: self.instrument.logger.warning('fhost %s coarse delay ' 'has hmc post ' 'errors' % fhost.host) action_dict['disable_output'] = True # check error counters, first check if a previous status # dict exists if self.f_eng_board_monitoring_dict_prev: cd_dict_prev = \ self.f_eng_board_monitoring_dict_prev[fhost][ 'coarse_delay'] # check error counters if cd_dict['reord_jitter_err_cnt_pol0'] != cd_dict_prev[ 'reord_jitter_err_cnt_pol0'] or cd_dict[ 'reord_jitter_err_cnt_pol1'] != cd_dict_prev[ 'reord_jitter_err_cnt_pol1']: self.instrument.logger.warning( 'fhost %s coarse delay has reorder jitter errors' % fhost.host) action_dict['disable_output'] = True if cd_dict['hmc_overflow_err_cnt_pol0'] != cd_dict_prev[ 'hmc_overflow_err_cnt_pol0'] or cd_dict[ 'hmc_overflow_err_cnt_pol1'] != cd_dict_prev[ 'hmc_overflow_err_cnt_pol1']: self.instrument.logger.warning('fhost %s coarse ' 'delay has ' 'overflow errors' % fhost.host) action_dict['disable_output'] = True # if no errors, check if board was disabled, then flag for reenable if not action_dict['disable_output']: # no errors detected, no need to disable any boards # but was the board previously disabled? if fhost in self.disabled_fhosts: # the errors on the board have been cleared action_dict['reenable_output'] = True return action_dict def _check_feng_corner_turn_errs(self, f_eng_status_dict, fhost): """ Check f-engines for any corner-turner errors :return: """ action_dict = {'disable_output': False, 'reenable_output': False} if f_eng_status_dict.has_key('corner_turner'): ct_dict = f_eng_status_dict['corner_turner'] # flags if not ct_dict['hmc_init_pol0'] or not ct_dict['hmc_init_pol1']: self.instrument.logger.warning('fhost %s corner-turner ' 'has hmc ' 'init errors' % fhost.host) action_dict['disable_output'] = True if not ct_dict['hmc_post_pol0'] or not ct_dict['hmc_post_pol1']: self.instrument.logger.warning('fhost %s corner-turner ' 'has hmc post ' 'errors' % fhost.host) action_dict['disable_output'] = True # check error counters, first check if a previous status # dict exists if self.f_eng_board_monitoring_dict_prev: ct_dict_prev = self.f_eng_board_monitoring_dict_prev[fhost][ 'corner_turner'] # check error counters if ct_dict['bank_err_cnt_pol0'] != ct_dict_prev[ 'bank_err_cnt_pol0'] or ct_dict[ \ 'bank_err_cnt_pol1'] != ct_dict_prev[ 'bank_err_cnt_pol1']: self.instrument.logger.warning('fhost %s ' 'corner-turner has bank errors' % fhost.host) action_dict['disable_output'] = True if ct_dict['fifo_full_err_cnt'] != ct_dict_prev[ 'fifo_full_err_cnt']: self.instrument.logger.warning('fhost %s ' 'corner-turner has fifo full ' 'errors' % fhost.host) action_dict['disable_output'] = True if ct_dict['rd_go_err_cnt'] != ct_dict_prev['rd_go_err_cnt']: self.instrument.logger.warning('fhost %s ' 'corner-turner has read go ' 'errors' % fhost.host) action_dict['disable_output'] = True if ct_dict['obuff_bank_err_cnt'] != ct_dict_prev[ 'obuff_bank_err_cnt']: self.instrument.logger.warning('fhost %s ' 'corner-turner has ' 'obuff errors' % fhost.host) action_dict['disable_output'] = True if ct_dict['hmc_overflow_err_cnt_pol0'] != ct_dict_prev[ 'hmc_overflow_err_cnt_pol0'] or ct_dict[ 'hmc_overflow_err_cnt_pol1'] != ct_dict_prev[ 'hmc_overflow_err_cnt_pol1']: self.instrument.logger.warning('fhost %s ' 'corner-turner has ' 'overflow errors' % fhost.host) action_dict['disable_output'] = True # if no errors, check if board was disabled, then flag for reenable if not action_dict['disable_output']: # no errors detected, no need to disable any boards # but was the board previously disabled? if fhost in self.disabled_fhosts: # the errors on the board have been cleared action_dict['reenable_output'] = True return action_dict def _disable_feng_ouput(self, fhost): """ Disables the output from an f-engine :param fhost: the host board with f-engines to disable :return: """ fhost.tx_disable() self.instrument.logger.warning('fhost%d %s %s output disabled!' % (fhost.fhost_index, fhost.host, [feng.input.name for feng in fhost.fengines] )) def _renable_feng_output(self, fhost): """ Reenables the output from an f-engine :param fhost: the host board with f-engines to reenable :return: """ fhost.tx_enable() self.instrument.logger.info('fhost%d %s %s output reenabled!' % (fhost.fhost_index, fhost.host, [feng.input.name for feng in fhost.fengines] )) def _disable_xeng_ouput(self, xhost): """ Disables the output from an f-engine :param xhost: the host board with f-engines to disable :return: """ xhost.registers.control.write(gbe_txen=False) self.instrument.logger.warning('xhost%d %s %s output disabled!' % (xhost.index, xhost.host, (self.instrument.xops.board_ids[xhost.host] * self.chans_per_xhost, (self.instrument.xops.board_ids[xhost.host] + 1) * self.chans_per_xhost - 1) )) def _renable_xeng_output(self, xhost): """ Reenables the output from an f-engine :param xhost: the host board with f-engines to reenable :return: """ xhost.registers.control.write(gbe_txen=True) self.instrument.logger.info('xhost%d %s %s output reenabled!' % (xhost.index, xhost.host, (self.instrument.xops.board_ids[ xhost.host] * self.chans_per_xhost, (self.instrument.xops.board_ids[ xhost.host] + 1) * self.chans_per_xhost - 1) )) def get_bad_fhosts(self): """ Returns a list of bad known fhosts that are currently disables :return: list of bad fhosts (hostnames) """ return self.disabled_fhosts def get_bad_xhosts(self): """ Returns a list of bad known xhosts that are currently disables :return: list of bad xhosts (hostnames) """ return self.disabled_xhosts def get_bad_bhosts(self): """ Returns a list of bad known bhosts that are currently disables :return: list of bad bhosts (hostnames) """ return self.disabled_bhosts
class Api: STATE_WORKING = 0 STATE_STOP_PENDING = 1 STATE_STOPPED = 2 CHAT_ACTION_TYPING = 'typing' CHAT_ACTION_UPLOAD_PHOTO = 'upload_photo' CHAT_ACTION_RECORD_VIDEO = 'record_video' CHAT_ACTION_UPLOAD_VIDEO = 'upload_video' CHAT_ACTION_RECORD_AUDIO = 'record_audio' CHAT_ACTION_UPLOAD_AUDIO = 'upload_audio' CHAT_ACTION_UPLOAD_DOC = 'upload_document' CHAT_ACTION_FIND_LOCATION = 'find_location' PARSE_MODE_NONE = None PARSE_MODE_MD = 'Markdown' PARSE_MODE_HTML = 'HTML' def __init__(self, token, processor): if ':' in token: self.bot_id, _ = token.split(':') if self.bot_id.isdigit(): self.bot_id = int(self.bot_id) else: raise ValueError('Non well-formatted token given') else: raise ValueError('Non well-formatted token given') self.token = token self.consumption_state = self.STATE_STOPPED self.processor = processor self.__me = None self._finished = Event() self._finished.set() @coroutine def get_me(self): if not self.__me: self.__me = yield self.__request_api('getMe') return self.__me def stop(self): assert not self._finished.is_set() self._finished.set() @property def is_alive(self): return not self._finished.is_set() @coroutine def __request_api(self, method, body=None, request_timeout=10, retry_on_nonuser_error=False): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) if name and isinstance(name, str) and name[0] != '<' and name[-1] != '>': return basename(name) url = 'https://api.telegram.org/bot{token}/{method}'.format( token=self.token, method=method) try: request = { 'request_timeout': request_timeout, 'headers': {}, } if body: request['method'] = 'POST' request_content = {} has_files = False file_names = {} for key, value in body.items(): if hasattr(value, 'read'): request_content[key] = value.read() file_names[key] = guess_filename(value) has_files = True else: request_content[key] = value if has_files: boundary = md5(str(time()).encode('utf-8')).hexdigest() request['headers'][ 'Content-type'] = 'multipart/form-data; boundary=' + boundary body = [] for key, value in request_content.items(): body.append(b'--' + boundary.encode('utf-8')) if key in file_names: body.append(( 'Content-Disposition: form-data; name="%s"; filename="%s"' % (key, file_names[key])).encode('utf-8')) else: body.append( ('Content-Disposition: form-data; name="%s"' % key).encode('utf-8')) body.append(b'') if isinstance(value, int): value = str(value) if isinstance(value, str): value = value.encode('utf-8') body.append(value) body.append(b'--' + boundary.encode('utf-8') + b'--') body = b"\r\n" + b"\r\n".join(body) + b"\r\n" else: request['headers']['Content-type'] = 'application/json' body = ujson.dumps(request_content) else: request['method'] = 'GET' while True: try: response = yield AsyncHTTPClient().fetch(url, body=body, **request) break except HTTPError as e: if not retry_on_nonuser_error or 400 <= e.code < 500: raise else: yield sleep(5) if response and response.body: response = ujson.loads(response.body.decode('utf-8')) if response['ok']: return response['result'] else: raise ApiError(response['error_code'], response['description'], response.get('parameters'), request_body=body) except HTTPError as e: if e.code == 599: logging.exception( '%s request timed out', method) # Do nothing on timeout, just return None elif e.response and e.response.body: response = ujson.loads(e.response.body.decode('utf-8')) raise ApiError(response['error_code'], response['description'], response.get('parameters'), request_body=body) else: raise ApiError(e.code, None, request_body=body) return None @coroutine def get_updates(self, offset: int = None, limit: int = 100, timeout: int = 2, retry_on_nonuser_error: bool = False): assert 1 <= limit <= 100 assert 0 <= timeout request = {'limit': limit, 'timeout': timeout} if offset is not None: request['offset'] = offset data = yield self.__request_api( 'getUpdates', request, request_timeout=timeout * 1.5, retry_on_nonuser_error=retry_on_nonuser_error) if data is None: return [] return data @coroutine def wait_commands(self, last_update_id=None): assert self._finished.is_set() self._finished.clear() self.consumption_state = self.STATE_WORKING if last_update_id is not None: last_update_id += 1 yield self.get_me() while not self._finished.is_set(): try: updates = yield self.get_updates(last_update_id, retry_on_nonuser_error=True) except: self._finished.set() raise for update in updates: yield maybe_future(self.processor(update)) if 'update_id' in update: last_update_id = update['update_id'] if len(updates): last_update_id += 1 @coroutine def send_chat_action(self, chat_id, action: str): return (yield self.__request_api('sendChatAction', { 'chat_id': chat_id, 'action': action })) @coroutine def send_message(self, text: str, chat_id=None, reply_to_message: dict = None, parse_mode: str = None, disable_web_page_preview: bool = False, disable_notification: bool = False, reply_to_message_id: int = None, reply_markup=None): request = { 'chat_id': chat_id, 'text': text, 'disable_web_page_preview': disable_web_page_preview, 'disable_notification': disable_notification, } if parse_mode is not None: request['parse_mode'] = parse_mode if reply_to_message_id is not None: request['reply_to_message_id'] = reply_to_message_id if reply_to_message: if chat_id is None: request['chat_id'] = reply_to_message['chat']['id'] if reply_to_message['chat']['id'] != reply_to_message['from'][ 'id']: request['reply_to_message_id'] = reply_to_message[ 'message_id'] else: request['reply_to_message_id'] = reply_to_message['message_id'] else: assert chat_id is not None if reply_markup is not None: request['reply_markup'] = reply_markup try: return (yield self.__request_api('sendMessage', request)) except ApiError as e: if e.code == 400 and e.description.startswith( "Bad Request: Can\'t parse"): logging.exception('Got exception while sending text: %s', text) raise @coroutine def send_photo(self, chat_id, photo, caption: str = None, disable_notification: bool = False, reply_to_message_id: int = None, reply_markup=None): request = { 'chat_id': chat_id, 'photo': photo, 'disable_notification': disable_notification, } if caption is not None: request['caption'] = caption if reply_to_message_id is not None: request['reply_to_message_id'] = reply_to_message_id if reply_markup is not None: request['reply_markup'] = reply_markup return (yield self.__request_api('sendPhoto', request)) @coroutine def forward_message(self, chat_id, from_chat_id, message_id: int, disable_notification: bool = False): return (yield self.__request_api( 'forwardMessage', { 'chat_id': chat_id, 'from_chat_id': from_chat_id, 'disable_notification': disable_notification, 'message_id': message_id, })) @staticmethod def _prepare_inline_message(message=None, chat_id=None, message_id=None, inline_message_id=None): request = {} if message: request['chat_id'] = message['chat']['id'] request['message_id'] = message['message_id'] elif chat_id and message_id: request['chat_id'] = chat_id request['message_id'] = message_id else: request['inline_message_id'] = inline_message_id return request @coroutine def edit_message_reply_markup(self, message=None, chat_id=None, message_id=None, inline_message_id=None, reply_markup=None): assert (chat_id and message_id) or message or inline_message_id request = self._prepare_inline_message( message=message, chat_id=chat_id, message_id=message_id, inline_message_id=inline_message_id) if reply_markup: request['reply_markup'] = reply_markup return (yield self.__request_api('editMessageReplyMarkup', request)) @coroutine def edit_message_text(self, text, message=None, chat_id=None, message_id=None, inline_message_id=None, parse_mode=None, disable_web_page_preview=False, reply_markup=None): request = self._prepare_inline_message( message=message, chat_id=chat_id, message_id=message_id, inline_message_id=inline_message_id) if parse_mode is not None: request['parse_mode'] = parse_mode request['disable_web_page_preview'] = disable_web_page_preview request['text'] = text if reply_markup is not None: request['reply_markup'] = reply_markup return (yield self.__request_api('editMessageText', request)) @coroutine def answer_callback_query(self, callback_query_id, text=None, show_alert=False): request = { 'callback_query_id': callback_query_id, 'show_alert': show_alert } if text: request['text'] = text return (yield self.__request_api('answerCallbackQuery', request)) @coroutine def get_chat_administrators(self, chat_id): return (yield self.__request_api('getChatAdministrators', {'chat_id': chat_id})) @coroutine def get_chat(self, chat_id): return (yield self.__request_api('getChat', {'chat_id': chat_id}))
class ConnectionPool(object): """ A maximum sized pool of Tornado IOStreams This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(ip=ip, port=port) for ip, port in ...] >>> info = yield scheduler.identity() It creates enough streams to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing streams so that we don't have to continuously reconnect. It also maintains a stream limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling streams. If that doesn't do the trick then we wait until one of the occupied streams closes. Parameters ---------- limit: int The number of open streams to maintain at once deserialize: bool Whether or not to deserialize data by default or pass it through """ def __init__(self, limit=512, deserialize=True): self.open = 0 self.active = 0 self.limit = limit self.available = defaultdict(set) self.occupied = defaultdict(set) self.deserialize = deserialize self.event = Event() def __str__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) __repr__ = __str__ def __call__(self, arg=None, ip=None, port=None, addr=None): """ Cached rpc objects """ ip, port = ip_port_from_args(arg=arg, addr=addr, ip=ip, port=port) return RPCCall(ip, port, self) @gen.coroutine def connect(self, ip, port, timeout=3): if self.available.get((ip, port)): stream = self.available[ip, port].pop() self.active += 1 self.occupied[ip, port].add(stream) raise gen.Return(stream) while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() self.open += 1 stream = yield connect(ip=ip, port=port, timeout=timeout) stream.set_close_callback(lambda: self.on_close(ip, port, stream)) self.active += 1 self.occupied[ip, port].add(stream) if self.open >= self.limit: self.event.clear() raise gen.Return(stream) def on_close(self, ip, port, stream): self.open -= 1 if stream in self.available[ip, port]: self.available[ip, port].remove(stream) if stream in self.occupied[ip, port]: self.occupied[ip, port].remove(stream) self.active -= 1 if self.open <= self.limit: self.event.set() def collect(self): logger.info("Collecting unused streams. open: %d, active: %d", self.open, self.active) for streams in list(self.available.values()): for stream in streams: close(stream) def close(self): for streams in list(self.available.values()): for stream in streams: close(stream) for streams in list(self.occupied.values()): for stream in streams: close(stream)
class ZMQDrain(object): """Implementation of IDrain that pushes to a zmq.Socket asynchronously. This implementation overrides the high-water mark behavior from cs.eyrie.vassal.Vassal to instead use a zmq.Poller. """ def __init__(self, logger, loop, zmq_socket, metric_prefix='emitter'): self.emitter = zmq_socket self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self._writable = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) def _handle_events(self, fd, events): if events & self.loop.ERROR: self.logger.error('Error polling socket for writability') elif events & self.loop.WRITE: self.loop.remove_handler(self.emitter) self._writable.set() @gen.coroutine def _poll(self): self.loop.add_handler(self.emitter, self._handle_events, self.loop.WRITE) yield self._writable.wait() self._writable.clear() @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.debug("Flushing send queue") self.emitter.close() def emit_nowait(self, msg): self.logger.debug("Drain emitting") if isinstance(msg, basestring): msg = [msg] try: self.emitter.send_multipart(msg, zmq.NOBLOCK) except zmq.Again: raise QueueFull() @gen.coroutine def emit(self, msg, retry_timeout=INITIAL_TIMEOUT): if isinstance(msg, basestring): msg = [msg] while True: # This should ensure the ZMQ socket can accept more data yield self._poll() try: self.emitter.send_multipart(msg, zmq.NOBLOCK) except zmq.Again: # But sometimes it's not enough self.logger.debug('Error polling for socket writability') retry_timeout = min(retry_timeout*2, MAX_TIMEOUT) yield gen.sleep(retry_timeout.total_seconds()) else: break
class Queue(object): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """Number of items allowed in the queue.""" return self._maxsize def qsize(self): """Number of items in the queue.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.gen.TimeoutError` after a timeout. """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """Remove and return an item from the queue. Returns a Future which resolves once an item is available, or raises `tornado.gen.TimeoutError` after a timeout. """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """Block until all items in the queue are processed. Returns a Future, which raises `tornado.gen.TimeoutError` after a timeout. """ return self._finished.wait(timeout) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class Queue(object): """协调生产者消费者协程. 如果maxsize 是0(默认配置)意味着队列的大小是无限的. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() @gen.coroutine def producer(): for item in range(5): yield q.put(item) print('Put %s' % item) @gen.coroutine def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) yield producer() # Wait for producer to put all tasks. yield q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done 在Python 3.5, `Queue` 实现了异步迭代器协议, 所以 ``consumer()`` 可以被重写为:: async def consumer(): async for item in q: try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 为Python 3.5添加 ``async for`` 支持 in Python 3.5. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """队列中允许的最大项目数.""" return self._maxsize def qsize(self): """当前队列中的项目数.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """将一个项目放入队列中, 可能需要等待直到队列中有空间. 返回一个Future对象, 如果超时会抛出 `tornado.gen.TimeoutError` . """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """非阻塞的将一个项目放入队列中. 如果没有立即可用的空闲插槽, 则抛出 `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """从队列中删除并返回一个项目. 返回一个Future对象, 当项目可用时resolve, 或者在超时后抛出 `tornado.gen.TimeoutError` . """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """非阻塞的从队列中删除并返回一个项目. 如果有项目是立即可用的则返回该项目, 否则抛出 `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """表明前面排队的任务已经完成. 被消费者队列使用. 每个 `.get` 用来获取一个任务, 随后(subsequent) 调用 `.task_done` 告诉队列正在处理的任务已经完成. 如果 `.join` 正在阻塞, 它会在所有项目都被处理完后调起; 即当每个 `.put` 都被一个 `.task_done` 匹配. 如果调用次数超过 `.put` 将会抛出 `ValueError` . """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """阻塞(block)直到队列中的所有项目都处理完. 返回一个Future对象, 超时后会抛出 `tornado.gen.TimeoutError` 异常. """ return self._finished.wait(timeout) @gen.coroutine def __aiter__(self): return _QueueIterator(self) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class Application(object): def __init__(self, routes, node, pipe): """ Application instantiates and registers handlers for each message type, and routes messages to the pre-instantiated instances of each message handler :param routes: list of tuples in the form of (<message type str>, <MessageHandler class>) :param node: Node instance of the local node :param pipe: Instance of multiprocessing.Pipe for communicating with the parent process """ # We don't really have to worry about synchronization # so long as we're careful about explicit context switching self.nodes = {node.node_id: node} self.local_node = node self.handlers = {} self.tcpclient = TCPClient() self.gossip_inbox = Queue() self.gossip_outbox = Queue() self.sequence_number = 0 if routes: self.add_handlers(routes) self.pipe = pipe self.ioloop = IOLoop.current() self.add_node_event = Event() def next_sequence_number(self): self.sequence_number += 1 return self.sequence_number @coroutine def ping_random_node(self): node = yield self.get_random_node() LOGGER.debug('{} pinging random node: {}'.format( self.local_node.node_id, node.node_id)) try: yield self.ping(node) except TimeoutError: self.mark_suspect(node) @coroutine def add_node(self, node): if node.node_id not in self.nodes: LOGGER.debug('Adding node {} to {}'.format(node, self.nodes)) self.add_node_event.set() self.nodes[node.node_id] = node LOGGER.debug('Added node {} to {}'.format(node, self.nodes)) @coroutine def remove_node(self, node): if node.node_id in self.nodes: del self.nodes[node.node_id] other_nodes = yield self.get_other_nodes if not other_nodes: self.add_node_event.clear() def add_handlers(self, handlers): for message_type, handler_cls in handlers: assert message_type in MESSAGE_TYPES, ( 'Message type {!r} not found in MESSAGE TYPES {}'.format( message_type, MESSAGE_TYPES.keys())) self.handlers[message_type] = handler_cls(self) def route_stream_message(self, stream, message_type, message): LOGGER.debug('{!r} received {} message from {!r}'.format( self, message_type, stream)) message_cls = MESSAGE_TYPES[message_type] message_obj = message_cls(**message) handler = self.handlers[message_type] LOGGER.debug('Routing {} to {}'.format(message_type, handler)) handler(stream, message_obj) @coroutine def send_message(self, stream, message): LOGGER.debug('Sending message {!r} to {}'.format( message.MESSAGE_TYPE, stream)) try: yield stream.write(message.to_msgpack) except StreamClosedError: LOGGER.warn('Unable to send {} to {} - stream closed'.format( message.MESSAGE_TYPE, stream)) @coroutine def _get_next_message(self, stream): # get the next message from the stream unpacker = msgpack.Unpacker() try: wire_bytes = yield with_timeout( datetime.timedelta(seconds=PING_TIMEOUT), stream.read_bytes(4096, partial=True)) except StreamClosedError: LOGGER.warn( 'Unable to get next message from {} - stream closed'.format( stream)) else: unpacker.feed(wire_bytes) LOGGER.debug('Deserializing object from stream {}'.format(stream)) message = unpacker.next() message.pop('type') raise Return(message) @coroutine def ping(self, node): """ Ping a node :param node: Instance of Node to ping :returns: Boolean, True if successful/False if fail """ host = node.addr port = node.port LOGGER.debug('pinging {}:{}'.format(host, port)) ping = Ping(seqno=self.next_sequence_number(), node=node, sender=self.local_node) # Connect to the node try: stream = yield self.tcpclient.connect(host, port) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to {} (pinging host)'.format( self.local_node.node_id, node.node_id)) raise Return(False) try: # Send the ping LOGGER.debug('Sending {!r} to {!r}'.format(ping.MESSAGE_TYPE, node)) yield self.send_message(stream, ping) # Wait for an ACK message in response LOGGER.debug('Getting next message from {}:{}'.format(host, port)) message = yield self._get_next_message(stream) if message is None: raise Return(False) ack = Ack(**message) LOGGER.debug('Received {!r} from {!r} (response to {!r})'.format( ack.MESSAGE_TYPE, node.node_id, ping.MESSAGE_TYPE)) # Check that the ACK sequence number matches the PING sequence number if ack.seqno == ping.seqno: LOGGER.debug( 'Sequence number matches. Node {} looks good to !'.format( node.node_id, self.local_node.node_id)) # Process the gossip messages tacked onto the ACK message's payload for message in ack.payload: try: self.gossip_inbox.put_nowait(message) except QueueFull: LOGGER.error( 'Unable to add {} message from {} to gossip inbox'. format(message.MESSAGE_TYPE, node.node_id)) # mark the node as ALIVE in self.nodes self.mark_alive(node) # Send gossip that this node is alive self.queue_gossip_send(Alive(node=node, sender=self.local_node)) raise Return(True) else: raise Return(False) finally: stream.close() @coroutine def ack(self, stream, seqno): payload = [] for _ in xrange(ACK_PAYLOAD_SIZE): try: gossip = self.gossip_outbox.get_nowait() payload.append(gossip) except QueueEmpty: break ack = Ack(seqno=seqno, payload=payload) LOGGER.debug('Trying to send ack: {}'.format(ack)) try: yield stream.write(ack.to_msgpack) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to stream (acking PING)'.format( self.local_node.node_id)) LOGGER.debug('Sent ack to {}'.format(stream)) @coroutine def _change_node_state(self, node, state): """ Because Tornado has explicit context switching, we don't need to worry much about synchronization here """ LOGGER.debug('{} knows about {}: {}'.format(self.local_node.node_id, node.node_id, state)) self.add_node(node) self.nodes[node.node_id].state = state @coroutine def mark_alive(self, node): if node.node_id != self.local_node.node_id: LOGGER.debug('Marking {} ALIVE'.format(node.node_id)) self._change_node_state(node, State.ALIVE) @coroutine def mark_dead(self, node): self._change_node_state(node, State.DEAD) @coroutine def mark_suspect(self, node): self._change_node_state(node, State.SUSPECT) @coroutine def ingest_gossip_inbox(self): while True: LOGGER.debug('checking inbox') message = yield self.gossip_inbox.get() LOGGER.debug('Received message {} from gossip inbox'.format( message.MESSAGE_TYPE)) if message.MESSAGE_TYPE == Alive.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_alive(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Suspect.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_suspect(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Dead.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_dead(message.node) self.queue_gossip_send(message) @coroutine def queue_gossip_send(self, message): """ If the message is gossipable, add it to the outbox """ try: next_incarnation = message.next_incarnation next_incarnation.sender = self.local_node except message.MaxIncarnationsReached: LOGGER.debug( 'Max incarnations reached for {}! No gossip 4 u'.format( message.MESSAGE_TYPE)) else: LOGGER.debug('Enqueuing {} gossips for {}'.format( GOSSIP_PEERS, message)) for _ in xrange(GOSSIP_PEERS): yield self.gossip_outbox.put(next_incarnation) @coroutine def send_buffered_gossip(self): while True: random_node = yield self.get_random_node() message = yield self.gossip_outbox.get() LOGGER.debug('{} connecting to {} for gossip'.format( self.local_node, random_node)) try: stream = yield self.tcpclient.connect(random_node.addr, random_node.port) except StreamClosedError: LOGGER.error( 'Unable to connect from {} to {} (sending gossip)'.format( self.local_node.node_id, random_node.node_id)) LOGGER.warning('Putting the gossip back on our queue') try: self.gossip_outbox.put_nowait(message) except QueueFull: LOGGER.error( 'Unable to put gossip back onto the queue. Giving up!') else: try: LOGGER.debug('{} gossipping with {}'.format( self.local_node.node_id, random_node.node_id)) yield self.send_message(stream, message) finally: stream.close() @coroutine def get_other_nodes(self, exclude=None): if exclude is None: exclude = (self.local_node, ) exclude_node_ids = [n.node_id for n in exclude] raise Return([n for n in self.nodes if n not in exclude_node_ids]) @coroutine def get_random_node(self, exclude=None): LOGGER.debug('Waiting for more nodes') yield self.add_node_event.wait() LOGGER.debug('Getting non-self random node') other_nodes = yield self.get_other_nodes(exclude=exclude) LOGGER.debug('{} got something! choices: {}'.format( self.local_node.node_id, other_nodes)) assert other_nodes node_id = random.choice(other_nodes) raise Return(self.nodes[node_id])
class Queue(object): """协调生产者消费者协程. 如果maxsize 是0(默认配置)意味着队列的大小是无限的. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() @gen.coroutine def producer(): for item in range(5): yield q.put(item) print('Put %s' % item) @gen.coroutine def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) yield producer() # Wait for producer to put all tasks. yield q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done 在Python 3.5, `Queue` 实现了异步迭代器协议, 所以 ``consumer()`` 可以被重写为:: async def consumer(): async for item in q: try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 为Python 3.5添加 ``async for`` 支持 in Python 3.5. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """队列中允许的最大项目数.""" return self._maxsize def qsize(self): """当前队列中的项目数.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """将一个项目放入队列中, 可能需要等待直到队列中有空间. 返回一个Future对象, 如果超时会抛出 `tornado.gen.TimeoutError` . """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """非阻塞的将一个项目放入队列中. 如果没有立即可用的空闲插槽, 则抛出 `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """从队列中删除并返回一个项目. 返回一个Future对象, 当项目可用时resolve, 或者在超时后抛出 `tornado.gen.TimeoutError` . """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """非阻塞的从队列中删除并返回一个项目. 如果有项目是立即可用的则返回该项目, 否则抛出 `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """表明前面排队的任务已经完成. 被消费者队列使用. 每个 `.get` 用来获取一个任务, 随后(subsequent) 调用 `.task_done` 告诉队列正在处理的任务已经完成. 如果 `.join` 正在阻塞, 它会在所有项目都被处理完后调起; 即当每个 `.put` 都被一个 `.task_done` 匹配. 如果调用次数超过 `.put` 将会抛出 `ValueError` . """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """阻塞(block)直到队列中的所有项目都处理完. 返回一个Future对象, 超时后会抛出 `tornado.gen.TimeoutError` 异常. """ return self._finished.wait(timeout) @gen.coroutine def __aiter__(self): return _QueueIterator(self) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % ( type(self).__name__, hex(id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class ApsProductController(object): """ Wrapper class for an APSUSE product. """ STATES = ["idle", "preparing", "ready", "starting", "capturing", "stopping", "error"] IDLE, PREPARING, READY, STARTING, CAPTURING, STOPPING, ERROR = STATES def __init__(self, parent, product_id, katportal_client, proxy_name): """ @brief Construct new instance @param parent The parent ApsMasterController instance @param product_id The name of the product @param katportal_client An katportal client wrapper instance @param proxy_name The name of the proxy associated with this subarray (used as a sensor prefix) #NEED FENG CONFIG @param servers A list of ApsWorkerServer instances allocated to this product controller """ self.log = logging.getLogger( "mpikat.apsuse_product_controller.{}".format(product_id)) self.log.debug( "Creating new ApsProductController with args: {}".format( ", ".join([str(i) for i in ( parent, product_id, katportal_client, proxy_name)]))) self._parent = parent self._product_id = product_id self._katportal_client = katportal_client self._proxy_name = proxy_name self._managed_sensors = [] self._worker_config_map = {} self._servers = [] self._fbf_sb_config = None self._state_interrupt = Event() self._base_output_dir = "/output/" self._coherent_beam_tracker = None self._incoherent_beam_tracker = None self.setup_sensors() def __del__(self): self.teardown_sensors() def info(self): """ @brief Return a metadata dictionary describing this product controller """ out = { "state":self.state, "proxy_name":self._proxy_name } return out def add_sensor(self, sensor): """ @brief Add a sensor to the parent object @note This method is used to wrap calls to the add_sensor method on the parent ApsMasterController instance. In order to disambiguate between sensors from describing different products the associated proxy name is used as sensor prefix. For example the "servers" sensor will be seen by clients connected to the ApsMasterController server as "<proxy_name>-servers" (e.g. "apsuse_1-servers"). """ prefix = "{}.".format(self._product_id) if sensor.name.startswith(prefix): self._parent.add_sensor(sensor) else: sensor.name = "{}{}".format(prefix, sensor.name) self._parent.add_sensor(sensor) self._managed_sensors.append(sensor) def setup_sensors(self): """ @brief Setup the default KATCP sensors. @note As this call is made only upon an APSUSE configure call a mass inform is required to let connected clients know that the proxy interface has changed. """ self._state_sensor = LoggingSensor.discrete( "state", description="Denotes the state of this APS instance", params=self.STATES, default=self.IDLE, initial_status=Sensor.NOMINAL) self._state_sensor.set_logger(self.log) self.add_sensor(self._state_sensor) self._fbf_sb_config_sensor = Sensor.string( "fbfuse-sb-config", description="The full FBFUSE schedule block configuration", default="", initial_status=Sensor.UNKNOWN) self.add_sensor(self._fbf_sb_config_sensor) self._worker_configs_sensor = Sensor.string( "worker-configs", description="The configurations for each worker server", default="", initial_status=Sensor.UNKNOWN) self.add_sensor(self._worker_configs_sensor) self._servers_sensor = Sensor.string( "servers", description="The worker server instances currently allocated to this product", default=",".join(["{s.hostname}:{s.port}".format(s=server) for server in self._servers]), initial_status=Sensor.UNKNOWN) self.add_sensor(self._servers_sensor) self._data_rate_per_worker_sensor = Sensor.float( "data-rate-per-worker", description="The maximum ingest rate per APSUSE worker server", default=6e9, unit="bits/s", initial_status=Sensor.NOMINAL) self.add_sensor(self._data_rate_per_worker_sensor) self._current_recording_directory_sensor = Sensor.string( "current-recording-directory", description="The current directory for recording from this subarray", default="", initial_status=Sensor.UNKNOWN ) self.add_sensor(self._current_recording_directory_sensor) self._current_recording_sensor = Sensor.string( "recording-params", description="The parameters of the current APSUSE recording", default="", initial_status=Sensor.UNKNOWN) self.add_sensor(self._current_recording_sensor) self._parent.mass_inform(Message.inform('interface-changed')) self._state_sensor.set_value(self.READY) def teardown_sensors(self): """ @brief Remove all sensors created by this product from the parent server. @note This method is required for cleanup to stop the APS sensor pool becoming swamped with unused sensors. """ for sensor in self._managed_sensors: self._parent.remove_sensor(sensor) self._managed_sensors = [] self._parent.mass_inform(Message.inform('interface-changed')) @property def servers(self): return self._servers @property def capturing(self): return self.state == self.CAPTURING @property def idle(self): return self.state == self.IDLE @property def starting(self): return self.state == self.STARTING @property def stopping(self): return self.state == self.STOPPING @property def ready(self): return self.state == self.READY @property def preparing(self): return self.state == self.PREPARING @property def error(self): return self.state == self.ERROR @property def state(self): return self._state_sensor.value() def set_error_state(self, message): self._state_sensor.set_value(self.ERROR) @coroutine def configure(self): pass @coroutine def deconfigure(self): pass @coroutine def disable_all_writers(self): self.log.debug("Disabling all writers") for server in self._servers: try: yield server.disable_writers() except Exception as error: self.log.exception("Failed to disable writers on {}: {}".format( server, str(error))) def set_data_rate_per_worker(self, value): if (value < 1e9) or (value > 25e9): log.warning("Suspect data rate set for workers: {} bits/s".format( value)) self._data_rate_per_worker_sensor.set_value(value) @coroutine def enable_writers(self): self.log.info("Enabling writers") self.log.debug("Getting beam positions") beam_map = yield self._katportal_client.get_fbfuse_coherent_beam_positions(self._product_id) target_config = yield self._katportal_client.get_fbfuse_target_config(self._product_id) beam_map.update({"ifbf00000": target_config["phase-reference"]}) self.log.debug("Beam map: {}".format(beam_map)) coherent_tsamp = self._fbf_sb_config["coherent-beam-tscrunch"] * self._fbf_sb_config["nchannels"] / self._fbf_sb_config["bandwidth"] incoherent_tsamp = self._fbf_sb_config["incoherent-beam-tscrunch"] * self._fbf_sb_config["nchannels"] / self._fbf_sb_config["bandwidth"] # Now get all information required for APSMETA file output_dir = "{}/{}".format( self._base_output_dir, time.strftime("%Y%m%d_%H%M%S")) os.makedirs(output_dir) proposal_id = yield self._katportal_client.get_proposal_id() sb_id = yield self._katportal_client.get_sb_id() apsuse_meta = { "centre_frequency": self._fbf_sb_config["centre-frequency"], "bandwidth": self._fbf_sb_config["bandwidth"], "coherent_nchans": self._fbf_sb_config["nchannels"] / self._fbf_sb_config["coherent-beam-fscrunch"], "coherent_tsamp": coherent_tsamp, "incoherent_nchans": self._fbf_sb_config["nchannels"] / self._fbf_sb_config["incoherent-beam-fscrunch"], "incoherent_tsamp": incoherent_tsamp, "project_name": proposal_id, "sb_id": sb_id, "utc_start": time.strftime("%Y/%m/%d %H:%M:%S"), "output_dir": output_dir.replace("/DATA/", "/beegfs/DATA/TRAPUM/"), "beamshape": target_config["coherent-beam-shape"], "boresight": target_config["phase-reference"], "beams": beam_map } # Generate user friendly formatting for the current recording: format_mapping = ( ("Centre frequency:", "centre_frequency", "Hz"), ("Bandwidth:", "bandwidth", "Hz"), ("CB Nchannels:", "coherent_nchans", ""), ("CB sampling:", "coherent_tsamp", "s"), ("IB Nchannels:", "incoherent_nchans", ""), ("IB sampling:", "incoherent_tsamp", "s"), ("Project ID:", "project_name", ""), ("SB ID:", "sb_id", ""), ("UTC start:", "utc_start", ""), ("Directory:", "output_dir", "") ) formatted_apsuse_meta = "<br />".join(("<font color='lightblue'><b>{}</b></font> {} {}".format(name, apsuse_meta[key], unit) for name, key, unit in format_mapping)) formatted_apsuse_meta = "<p>{}</p>".format(formatted_apsuse_meta) self._current_recording_sensor.set_value(formatted_apsuse_meta) self._current_recording_directory_sensor.set_value(output_dir) try: with open("{}/apsuse.meta".format(output_dir), "w") as f: f.write(json.dumps(apsuse_meta)) except Exception: log.exception("Could not write apsuse.meta file") enable_futures = [] for server in self._servers: worker_config = self._worker_config_map[server] sub_beam_list = {} for beam in worker_config.incoherent_beams(): if beam in beam_map: sub_beam_list[beam] = beam_map[beam] for beam in worker_config.coherent_beams(): if beam in beam_map: sub_beam_list[beam] = beam_map[beam] enable_futures.append(server.enable_writers(sub_beam_list, output_dir)) for ii, future in enumerate(enable_futures): try: yield future except Exception as error: self.log.exception("Failed to enable writers on server {}: {}".format( self._servers[ii], str(error))) @coroutine def capture_start(self): if not self.ready: raise ApsProductStateError([self.READY], self.state) self._state_sensor.set_value(self.STARTING) self.log.debug("Product moved to 'starting' state") # At this point assume we do not know about the SB config and get everything fresh proposal_id = yield self._katportal_client.get_proposal_id() sb_id = yield self._katportal_client.get_sb_id() # determine base output path # /output/{proposal_id}/{sb_id}/ # scan number will be added to the path later # The /DATA/ path is usually a mount of /beegfs/DATA/TRAPUM self._base_output_dir = "/DATA/{}/{}/".format(proposal_id, sb_id) self._fbf_sb_config = yield self._katportal_client.get_fbfuse_sb_config(self._product_id) self._fbf_sb_config_sensor.set_value(self._fbf_sb_config) self.log.debug("Determined FBFUSE config: {}".format(self._fbf_sb_config)) # New multicast setup # First we allocate all servers self._servers = [] self._worker_config_map = {} while True: try: server = self._parent._server_pool.allocate(1)[0] except WorkerAllocationError: break else: self._servers.append(server) config_generator = ApsConfigGenerator(self._fbf_sb_config, self._data_rate_per_worker_sensor.value()) self._worker_config_map = config_generator.allocate_groups(self._servers) message = "\n".join(( "Could not allocate resources for capture of the following groups", "incoherent groups: {}".format(",".join( map(str, config_generator.remaining_incoherent_groups()))), "coherent groups: {}".format(",".join( map(str, config_generator.remaining_coherent_groups()))))) self.log.warning(message) cb_data_rate = (self._fbf_sb_config["coherent-beam-multicast-groups-data-rate"] / self._fbf_sb_config["coherent-beam-count-per-group"]) ib_data_rate = self._fbf_sb_config["incoherent-beam-multicast-group-data-rate"] cb_file_size = (cb_data_rate * DEFAULT_FILE_LENGTH) / 8 ib_file_size = (ib_data_rate * DEFAULT_FILE_LENGTH) / 8 self.log.info("CB filesize: {} bytes".format(cb_file_size)) self.log.info("IB filesize: {} bytes".format(ib_file_size)) # Get all common configuration parameters common_config = { "bandwidth": self._fbf_sb_config["bandwidth"], "centre-frequency": self._fbf_sb_config["centre-frequency"], "sample-clock": self._fbf_sb_config["bandwidth"] * 2, } common_config["sync-epoch"] = yield self._katportal_client.get_sync_epoch() common_coherent_config = { "heap-size": self._fbf_sb_config["coherent-beam-heap-size"], "idx1-step": self._fbf_sb_config["coherent-beam-idx1-step"], "nchans": self._fbf_sb_config["nchannels"] / self._fbf_sb_config["coherent-beam-fscrunch"], "nchans-per-heap": self._fbf_sb_config["coherent-beam-subband-nchans"], "sampling-interval": self._fbf_sb_config["coherent-beam-time-resolution"], "base-output-dir": "{}".format(self._base_output_dir), "filesize": cb_file_size } common_incoherent_config = { "heap-size": self._fbf_sb_config["incoherent-beam-heap-size"], "idx1-step": self._fbf_sb_config["incoherent-beam-idx1-step"], "nchans": self._fbf_sb_config["nchannels"] / self._fbf_sb_config["incoherent-beam-fscrunch"], "nchans-per-heap": self._fbf_sb_config["incoherent-beam-subband-nchans"], "sampling-interval": self._fbf_sb_config["incoherent-beam-time-resolution"], "base-output-dir": "{}".format(self._base_output_dir), "filesize": ib_file_size } configure_futures = [] all_server_configs = {} for server, config in self._worker_config_map.items(): server_config = {} if config.incoherent_groups(): incoherent_config = deepcopy(common_config) incoherent_config.update(common_incoherent_config) incoherent_config["beam-ids"] = [] incoherent_config["stream-indices"] = [] incoherent_config["mcast-groups"] = [] incoherent_config["mcast-port"] = 7147 # Where should this info come from? for beam in config.incoherent_beams(): incoherent_config["beam-ids"].append(beam) incoherent_config["stream-indices"].append(int(beam.lstrip("ifbf"))) incoherent_config["mcast-groups"].extend(map(str, config.incoherent_groups())) server_config["incoherent-beams"] = incoherent_config if config.coherent_groups(): coherent_config = deepcopy(common_config) coherent_config.update(common_coherent_config) coherent_config["beam-ids"] = [] coherent_config["stream-indices"] = [] coherent_config["mcast-groups"] = [] coherent_config["mcast-port"] = 7147 # Where should this info come from? for beam in config.coherent_beams(): coherent_config["beam-ids"].append(beam) coherent_config["stream-indices"].append(int(beam.lstrip("cfbf"))) coherent_config["mcast-groups"].extend(map(str, config.coherent_groups())) server_config["coherent-beams"] = coherent_config configure_futures.append(server.configure(server_config)) all_server_configs[server] = server_config self.log.info("Configuration for server {}: {}".format( server, server_config)) self._worker_configs_sensor.set_value(all_server_configs) failure_count = 0 for future in configure_futures: try: yield future except Exception as error: log.error( "Failed to configure server with error: {}".format( str(error))) failure_count += 1 if (failure_count == len(self._servers)) and not (len(self._servers) == 0): self._state_sensor.set_value(self.ERROR) self.log.info("Failed to prepare FBFUSE product") raise Exception("No APSUSE servers configured successfully") elif failure_count > 0: self.log.warning("{} APSUSE servers failed to configure".format( failure_count)) # At this point we do the data-suspect tracking start self._coherent_beam_tracker = self._katportal_client.get_sensor_tracker( "fbfuse", "fbfmc_{}_coherent_beam_data_suspect".format( self._product_id)) self._incoherent_beam_tracker = self._katportal_client.get_sensor_tracker( "fbfuse", "fbfmc_{}_incoherent_beam_data_suspect".format( self._product_id)) self.log.info("Starting FBFUSE data-suspect tracking") yield self._coherent_beam_tracker.start() yield self._incoherent_beam_tracker.start() @coroutine def wait_for_on_target(): self.log.info("Waiting for data-suspect flags to become False") self._state_interrupt.clear() try: yield self._coherent_beam_tracker.wait_until( False, self._state_interrupt) yield self._incoherent_beam_tracker.wait_until( False, self._state_interrupt) except Interrupt: self.log.debug("data-suspect tracker interrupted") pass else: self.log.info("data-suspect flags now False (on target)") try: yield self.disable_all_writers() yield self.enable_writers() except Exception: log.exception("error") self._parent.ioloop.add_callback(wait_for_off_target) @coroutine def wait_for_off_target(): self.log.info("Waiting for data-suspect flags to become True") self._state_interrupt.clear() try: yield self._coherent_beam_tracker.wait_until( True, self._state_interrupt) yield self._incoherent_beam_tracker.wait_until( True, self._state_interrupt) except Interrupt: self.log.debug("data-suspect tracker interrupted") pass else: self.log.info("data-suspect flags now True (off-target/retiling)") yield self.disable_all_writers() self._parent.ioloop.add_callback(wait_for_on_target) self._parent.ioloop.add_callback(wait_for_on_target) server_str = ",".join(["{s.hostname}:{s.port}".format( s=server) for server in self._servers]) self._servers_sensor.set_value(server_str) self._state_sensor.set_value(self.CAPTURING) self.log.debug("Product moved to 'capturing' state") @coroutine def capture_stop(self): """ @brief Stops the beamformer servers streaming. @detail This should only be called on a schedule block reconfiguration if the same configuration persists between schedule blocks then it is preferable to continue streaming rather than stopping and starting again. """ if not self.capturing and not self.error: return self._state_sensor.set_value(self.STOPPING) self._state_interrupt.set() if self._coherent_beam_tracker: yield self._coherent_beam_tracker.stop() self._coherent_beam_tracker = None if self._incoherent_beam_tracker: yield self._incoherent_beam_tracker.stop() self._incoherent_beam_tracker = None yield self.disable_all_writers() deconfigure_futures = [] for server in self._worker_config_map.keys(): self.log.info("Sending deconfigure to server {}".format(server)) deconfigure_futures.append(server.deconfigure()) for ii, future in enumerate(deconfigure_futures): try: yield future except Exception as error: server = self._worker_config_map.keys()[ii] self.log.exception("Failed to deconfigure worker {}: {}".format( server, str(error))) self._parent._server_pool.deallocate(self._worker_config_map.keys()) self.log.info("Deallocated all servers") self._worker_config_map = {} self._servers_sensor.set_value("") self._state_sensor.set_value(self.READY)
class Application(object): def __init__(self, routes, node, pipe): """ Application instantiates and registers handlers for each message type, and routes messages to the pre-instantiated instances of each message handler :param routes: list of tuples in the form of (<message type str>, <MessageHandler class>) :param node: Node instance of the local node :param pipe: Instance of multiprocessing.Pipe for communicating with the parent process """ # We don't really have to worry about synchronization # so long as we're careful about explicit context switching self.nodes = {node.node_id: node} self.local_node = node self.handlers = {} self.tcpclient = TCPClient() self.gossip_inbox = Queue() self.gossip_outbox = Queue() self.sequence_number = 0 if routes: self.add_handlers(routes) self.pipe = pipe self.ioloop = IOLoop.current() self.add_node_event = Event() def next_sequence_number(self): self.sequence_number += 1 return self.sequence_number @coroutine def ping_random_node(self): node = yield self.get_random_node() LOGGER.debug('{} pinging random node: {}'.format(self.local_node.node_id, node.node_id)) try: yield self.ping(node) except TimeoutError: self.mark_suspect(node) @coroutine def add_node(self, node): if node.node_id not in self.nodes: LOGGER.debug('Adding node {} to {}'.format(node, self.nodes)) self.add_node_event.set() self.nodes[node.node_id] = node LOGGER.debug('Added node {} to {}'.format(node, self.nodes)) @coroutine def remove_node(self, node): if node.node_id in self.nodes: del self.nodes[node.node_id] other_nodes = yield self.get_other_nodes if not other_nodes: self.add_node_event.clear() def add_handlers(self, handlers): for message_type, handler_cls in handlers: assert message_type in MESSAGE_TYPES, ( 'Message type {!r} not found in MESSAGE TYPES {}'.format( message_type, MESSAGE_TYPES.keys() ) ) self.handlers[message_type] = handler_cls(self) def route_stream_message(self, stream, message_type, message): LOGGER.debug('{!r} received {} message from {!r}'.format(self, message_type, stream)) message_cls = MESSAGE_TYPES[message_type] message_obj = message_cls(**message) handler = self.handlers[message_type] LOGGER.debug('Routing {} to {}'.format(message_type, handler)) handler(stream, message_obj) @coroutine def send_message(self, stream, message): LOGGER.debug('Sending message {!r} to {}'.format(message.MESSAGE_TYPE, stream)) try: yield stream.write(message.to_msgpack) except StreamClosedError: LOGGER.warn('Unable to send {} to {} - stream closed'.format(message.MESSAGE_TYPE, stream)) @coroutine def _get_next_message(self, stream): # get the next message from the stream unpacker = msgpack.Unpacker() try: wire_bytes = yield with_timeout( datetime.timedelta(seconds=PING_TIMEOUT), stream.read_bytes(4096, partial=True) ) except StreamClosedError: LOGGER.warn('Unable to get next message from {} - stream closed'.format(stream)) else: unpacker.feed(wire_bytes) LOGGER.debug('Deserializing object from stream {}'.format(stream)) message = unpacker.next() message.pop('type') raise Return(message) @coroutine def ping(self, node): """ Ping a node :param node: Instance of Node to ping :returns: Boolean, True if successful/False if fail """ host = node.addr port = node.port LOGGER.debug('pinging {}:{}'.format(host, port)) ping = Ping(seqno=self.next_sequence_number(), node=node, sender=self.local_node) # Connect to the node try: stream = yield self.tcpclient.connect(host, port) except StreamClosedError: LOGGER.error('Unable to connect from {} to {} (pinging host)'.format(self.local_node.node_id, node.node_id)) raise Return(False) try: # Send the ping LOGGER.debug('Sending {!r} to {!r}'.format(ping.MESSAGE_TYPE, node)) yield self.send_message(stream, ping) # Wait for an ACK message in response LOGGER.debug('Getting next message from {}:{}'.format(host, port)) message = yield self._get_next_message(stream) if message is None: raise Return(False) ack = Ack(**message) LOGGER.debug('Received {!r} from {!r} (response to {!r})'.format(ack.MESSAGE_TYPE, node.node_id, ping.MESSAGE_TYPE)) # Check that the ACK sequence number matches the PING sequence number if ack.seqno == ping.seqno: LOGGER.debug('Sequence number matches. Node {} looks good to !'.format(node.node_id, self.local_node.node_id)) # Process the gossip messages tacked onto the ACK message's payload for message in ack.payload: try: self.gossip_inbox.put_nowait(message) except QueueFull: LOGGER.error('Unable to add {} message from {} to gossip inbox'.format(message.MESSAGE_TYPE, node.node_id)) # mark the node as ALIVE in self.nodes self.mark_alive(node) # Send gossip that this node is alive self.queue_gossip_send( Alive(node=node, sender=self.local_node) ) raise Return(True) else: raise Return(False) finally: stream.close() @coroutine def ack(self, stream, seqno): payload = [] for _ in xrange(ACK_PAYLOAD_SIZE): try: gossip = self.gossip_outbox.get_nowait() payload.append(gossip) except QueueEmpty: break ack = Ack(seqno=seqno, payload=payload) LOGGER.debug('Trying to send ack: {}'.format(ack)) try: yield stream.write(ack.to_msgpack) except StreamClosedError: LOGGER.error('Unable to connect from {} to stream (acking PING)'.format(self.local_node.node_id)) LOGGER.debug('Sent ack to {}'.format(stream)) @coroutine def _change_node_state(self, node, state): """ Because Tornado has explicit context switching, we don't need to worry much about synchronization here """ LOGGER.debug('{} knows about {}: {}'.format(self.local_node.node_id, node.node_id, state)) self.add_node(node) self.nodes[node.node_id].state = state @coroutine def mark_alive(self, node): if node.node_id != self.local_node.node_id: LOGGER.debug('Marking {} ALIVE'.format(node.node_id)) self._change_node_state(node, State.ALIVE) @coroutine def mark_dead(self, node): self._change_node_state(node, State.DEAD) @coroutine def mark_suspect(self, node): self._change_node_state(node, State.SUSPECT) @coroutine def ingest_gossip_inbox(self): while True: LOGGER.debug('checking inbox') message = yield self.gossip_inbox.get() LOGGER.debug('Received message {} from gossip inbox'.format(message.MESSAGE_TYPE)) if message.MESSAGE_TYPE == Alive.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_alive(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Suspect.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_suspect(message.node) self.queue_gossip_send(message) elif message.MESSAGE_TYPE == Dead.MESSAGE_TYPE: self.mark_alive(message.sender) self.mark_dead(message.node) self.queue_gossip_send(message) @coroutine def queue_gossip_send(self, message): """ If the message is gossipable, add it to the outbox """ try: next_incarnation = message.next_incarnation next_incarnation.sender = self.local_node except message.MaxIncarnationsReached: LOGGER.debug('Max incarnations reached for {}! No gossip 4 u'.format(message.MESSAGE_TYPE)) else: LOGGER.debug('Enqueuing {} gossips for {}'.format(GOSSIP_PEERS, message)) for _ in xrange(GOSSIP_PEERS): yield self.gossip_outbox.put(next_incarnation) @coroutine def send_buffered_gossip(self): while True: random_node = yield self.get_random_node() message = yield self.gossip_outbox.get() LOGGER.debug('{} connecting to {} for gossip'.format(self.local_node, random_node)) try: stream = yield self.tcpclient.connect(random_node.addr, random_node.port) except StreamClosedError: LOGGER.error('Unable to connect from {} to {} (sending gossip)'.format(self.local_node.node_id, random_node.node_id)) LOGGER.warning('Putting the gossip back on our queue') try: self.gossip_outbox.put_nowait(message) except QueueFull: LOGGER.error('Unable to put gossip back onto the queue. Giving up!') else: try: LOGGER.debug('{} gossipping with {}'.format(self.local_node.node_id, random_node.node_id)) yield self.send_message(stream, message) finally: stream.close() @coroutine def get_other_nodes(self, exclude=None): if exclude is None: exclude = (self.local_node,) exclude_node_ids = [n.node_id for n in exclude] raise Return([n for n in self.nodes if n not in exclude_node_ids]) @coroutine def get_random_node(self, exclude=None): LOGGER.debug('Waiting for more nodes') yield self.add_node_event.wait() LOGGER.debug('Getting non-self random node') other_nodes = yield self.get_other_nodes(exclude=exclude) LOGGER.debug('{} got something! choices: {}'.format(self.local_node.node_id, other_nodes)) assert other_nodes node_id = random.choice(other_nodes) raise Return(self.nodes[node_id])
class Queue(object): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() @gen.coroutine def producer(): for item in range(5): yield q.put(item) print('Put %s' % item) @gen.coroutine def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) yield producer() # Wait for producer to put all tasks. yield q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In Python 3.5, `Queue` implements the async iterator protocol, so ``consumer()`` could be rewritten as:: async def consumer(): async for item in q: try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """Number of items allowed in the queue.""" return self._maxsize def qsize(self): """Number of items in the queue.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """Remove and return an item from the queue. Returns a Future which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """Block until all items in the queue are processed. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) @gen.coroutine def __aiter__(self): return _QueueIterator(self) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % ( type(self).__name__, hex(id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class Api: STATE_WORKING = 0 STATE_STOP_PENDING = 1 STATE_STOPPED = 2 CHAT_ACTION_TYPING = 'typing' CHAT_ACTION_UPLOAD_PHOTO = 'upload_photo' CHAT_ACTION_RECORD_VIDEO = 'record_video' CHAT_ACTION_UPLOAD_VIDEO = 'upload_video' CHAT_ACTION_RECORD_AUDIO = 'record_audio' CHAT_ACTION_UPLOAD_AUDIO = 'upload_audio' CHAT_ACTION_UPLOAD_DOC = 'upload_document' CHAT_ACTION_FIND_LOCATION = 'find_location' PARSE_MODE_NONE = None PARSE_MODE_MD = 'Markdown' PARSE_MODE_HTML = 'HTML' def __init__(self, token, processor): if ':' in token: self.bot_id, _ = token.split(':') if self.bot_id.isdigit(): self.bot_id = int(self.bot_id) else: raise ValueError('Non well-formatted token given') else: raise ValueError('Non well-formatted token given') self.token = token self.consumption_state = self.STATE_STOPPED self.processor = processor self.__me = None self._finished = Event() self._finished.set() @coroutine def get_me(self): if not self.__me: self.__me = yield self.__request_api('getMe') return self.__me def stop(self): assert not self._finished.is_set() self._finished.set() @property def is_alive(self): return not self._finished.is_set() @coroutine def __request_api(self, method, body=None, request_timeout=10, retry_on_nonuser_error=False): def guess_filename(obj): """Tries to guess the filename of the given object.""" name = getattr(obj, 'name', None) if name and isinstance(name, str) and name[0] != '<' and name[-1] != '>': return basename(name) url = 'https://api.telegram.org/bot{token}/{method}'.format(token=self.token, method=method) try: request = { 'request_timeout': request_timeout, 'headers': {}, } if body: request['method'] = 'POST' request_content = {} has_files = False file_names = {} for key, value in body.items(): if hasattr(value, 'read'): request_content[key] = value.read() file_names[key] = guess_filename(value) has_files = True else: request_content[key] = value if has_files: boundary = md5(str(time()).encode('utf-8')).hexdigest() request['headers']['Content-type'] = 'multipart/form-data; boundary=' + boundary body = [] for key, value in request_content.items(): body.append(b'--' + boundary.encode('utf-8')) if key in file_names: body.append(('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, file_names[key])).encode('utf-8')) else: body.append(('Content-Disposition: form-data; name="%s"' % key).encode('utf-8')) body.append(b'') if isinstance(value, int): value = str(value) if isinstance(value, str): value = value.encode('utf-8') body.append(value) body.append(b'--' + boundary.encode('utf-8') + b'--') body = b"\r\n" + b"\r\n".join(body) + b"\r\n" else: request['headers']['Content-type'] = 'application/json' body = ujson.dumps(request_content) else: request['method'] = 'GET' while True: try: response = yield AsyncHTTPClient().fetch(url, body=body, **request) break except HTTPError as e: if not retry_on_nonuser_error or 400 <= e.code < 500: raise else: yield sleep(5) if response and response.body: response = ujson.loads(response.body.decode('utf-8')) if response['ok']: return response['result'] else: raise ApiError(response['error_code'], response['description'], response.get('parameters'), request_body=body) except HTTPError as e: if e.code == 599: logging.exception('%s request timed out', method) # Do nothing on timeout, just return None elif e.response and e.response.body: response = ujson.loads(e.response.body.decode('utf-8')) raise ApiError(response['error_code'], response['description'], response.get('parameters'), request_body=body) else: raise ApiError(e.code, None, request_body=body) return None @coroutine def get_updates(self, offset: int=None, limit: int=100, timeout: int=2, retry_on_nonuser_error: bool=False): assert 1 <= limit <= 100 assert 0 <= timeout request = { 'limit': limit, 'timeout': timeout } if offset is not None: request['offset'] = offset data = yield self.__request_api('getUpdates', request, request_timeout=timeout * 1.5, retry_on_nonuser_error=retry_on_nonuser_error) if data is None: return [] return data @coroutine def wait_commands(self, last_update_id=None): assert self._finished.is_set() self._finished.clear() self.consumption_state = self.STATE_WORKING if last_update_id is not None: last_update_id += 1 yield self.get_me() while not self._finished.is_set(): try: updates = yield self.get_updates(last_update_id, retry_on_nonuser_error=True) except: self._finished.set() raise for update in updates: yield maybe_future(self.processor(update)) if 'update_id' in update: last_update_id = update['update_id'] if len(updates): last_update_id += 1 @coroutine def send_chat_action(self, chat_id, action: str): return (yield self.__request_api('sendChatAction', {'chat_id': chat_id, 'action': action})) @coroutine def send_message(self, text: str, chat_id=None, reply_to_message: dict=None, parse_mode: str=None, disable_web_page_preview: bool=False, disable_notification: bool=False, reply_to_message_id: int=None, reply_markup=None): request = { 'chat_id': chat_id, 'text': text, 'disable_web_page_preview': disable_web_page_preview, 'disable_notification': disable_notification, } if parse_mode is not None: request['parse_mode'] = parse_mode if reply_to_message_id is not None: request['reply_to_message_id'] = reply_to_message_id if reply_to_message: if chat_id is None: request['chat_id'] = reply_to_message['chat']['id'] if reply_to_message['chat']['id'] != reply_to_message['from']['id']: request['reply_to_message_id'] = reply_to_message['message_id'] else: request['reply_to_message_id'] = reply_to_message['message_id'] else: assert chat_id is not None if reply_markup is not None: request['reply_markup'] = reply_markup try: return (yield self.__request_api('sendMessage', request)) except ApiError as e: if e.code == 400 and e.description.startswith("Bad Request: Can\'t parse"): logging.exception('Got exception while sending text: %s', text) raise @coroutine def send_photo(self, chat_id, photo, caption: str=None, disable_notification: bool=False, reply_to_message_id: int=None, reply_markup=None): request = { 'chat_id': chat_id, 'photo': photo, 'disable_notification': disable_notification, } if caption is not None: request['caption'] = caption if reply_to_message_id is not None: request['reply_to_message_id'] = reply_to_message_id if reply_markup is not None: request['reply_markup'] = reply_markup return (yield self.__request_api('sendPhoto', request)) @coroutine def forward_message(self, chat_id, from_chat_id, message_id: int, disable_notification: bool=False): return (yield self.__request_api('forwardMessage', { 'chat_id': chat_id, 'from_chat_id': from_chat_id, 'disable_notification': disable_notification, 'message_id': message_id, })) @staticmethod def _prepare_inline_message(message=None, chat_id=None, message_id=None, inline_message_id=None): request = {} if message: request['chat_id'] = message['chat']['id'] request['message_id'] = message['message_id'] elif chat_id and message_id: request['chat_id'] = chat_id request['message_id'] = message_id else: request['inline_message_id'] = inline_message_id return request @coroutine def edit_message_reply_markup(self, message=None, chat_id=None, message_id=None, inline_message_id=None, reply_markup=None): assert (chat_id and message_id) or message or inline_message_id request = self._prepare_inline_message(message=message, chat_id=chat_id, message_id=message_id, inline_message_id=inline_message_id) if reply_markup: request['reply_markup'] = reply_markup return (yield self.__request_api('editMessageReplyMarkup', request)) @coroutine def edit_message_text(self, text, message=None, chat_id=None, message_id=None, inline_message_id=None, parse_mode=None, disable_web_page_preview=False, reply_markup=None): request = self._prepare_inline_message(message=message, chat_id=chat_id, message_id=message_id, inline_message_id=inline_message_id) if parse_mode is not None: request['parse_mode'] = parse_mode request['disable_web_page_preview'] = disable_web_page_preview request['text'] = text if reply_markup is not None: request['reply_markup'] = reply_markup return (yield self.__request_api('editMessageText', request)) @coroutine def answer_callback_query(self, callback_query_id, text=None, show_alert=False): request = { 'callback_query_id': callback_query_id, 'show_alert': show_alert } if text: request['text'] = text return (yield self.__request_api('answerCallbackQuery', request)) @coroutine def get_chat_administrators(self, chat_id): return (yield self.__request_api('getChatAdministrators', {'chat_id': chat_id})) @coroutine def get_chat(self, chat_id): return (yield self.__request_api('getChat', {'chat_id': chat_id}))
class PollingLock(object): """ Acquires a lock by writing to a key. This is suitable for a leader election in cases where some downtime and initial acquisition delay is acceptable. Unlike ZooKeeper and etcd, FoundationDB does not have a way to specify that a key should be automatically deleted if a client does not heartbeat at a regular interval. This implementation requires the leader to update the key at regular intervals to indicate that it is still alive. All the other lock candidates check at a longer interval to see if the leader has stopped updating the key. Since client timestamps are unreliable, candidates do not know the absolute time the key was updated. Therefore, they each wait for the full timeout interval before checking the key again. """ # The number of seconds to wait before trying to claim the lease. _LEASE_TIMEOUT = 60 # The number of seconds to wait before updating the lease. _HEARTBEAT_INTERVAL = int(_LEASE_TIMEOUT / 10) def __init__(self, db, tornado_fdb, key): self.key = key self._db = db self._tornado_fdb = tornado_fdb self._client_id = uuid.uuid4() self._owner = None self._op_id = None self._deadline = None self._event = Event() @property def acquired(self): if self._deadline is None: return False return (self._owner == self._client_id and monotonic.monotonic() < self._deadline) def start(self): IOLoop.current().spawn_callback(self._run) @gen.coroutine def acquire(self): # Since there is no automatic event timeout, the condition is checked # before every acquisition. if not self.acquired: self._event.clear() yield self._event.wait() @gen.coroutine def _run(self): while True: try: yield self._acquire_lease() except Exception: logger.exception(u'Unable to acquire lease') yield gen.sleep(random.random() * 20) @gen.coroutine def _acquire_lease(self): tr = self._db.create_transaction() lease_value = yield self._tornado_fdb.get(tr, self.key) if lease_value.present(): self._owner, new_op_id = fdb.tuple.unpack(lease_value) if new_op_id != self._op_id: self._deadline = monotonic.monotonic() + self._LEASE_TIMEOUT self._op_id = new_op_id else: self._owner = None can_acquire = self._owner is None or monotonic.monotonic( ) > self._deadline if can_acquire or self._owner == self._client_id: op_id = uuid.uuid4() tr[self.key] = fdb.tuple.pack((self._client_id, op_id)) try: yield self._tornado_fdb.commit(tr, convert_exceptions=False) except fdb.FDBError as fdb_error: if fdb_error.code != FDBErrorCodes.NOT_COMMITTED: raise # If there was a conflict, try to acquire again later. yield gen.sleep(random.random() * 20) return self._owner = self._client_id self._op_id = op_id self._deadline = monotonic.monotonic() + self._LEASE_TIMEOUT self._event.set() if can_acquire: logger.info(u'Acquired lock for {!r}'.format(self.key)) yield gen.sleep(self._HEARTBEAT_INTERVAL) return # Since another candidate holds the lock, wait until it might expire. yield gen.sleep(max(self._deadline - monotonic.monotonic(), 0))
class IndexManager(object): """ Keeps track of configured datastore indexes. """ # The node which keeps track of admin lock contenders. ADMIN_LOCK_NODE = '/appscale/datastore/index_manager_lock' def __init__(self, zk_client, datastore_access, perform_admin=False): """ Creates a new IndexManager. Args: zk_client: A kazoo.client.KazooClient object. datastore_access: A DatastoreDistributed object. perform_admin: A boolean specifying whether or not to perform admin operations. """ self.projects = {} self._wake_event = AsyncEvent() self._zk_client = zk_client self.admin_lock = AsyncKazooLock(self._zk_client, self.ADMIN_LOCK_NODE) # TODO: Refactor so that this dependency is not needed. self._ds_access = datastore_access self._zk_client.ensure_path('/appscale/projects') self._zk_client.ChildrenWatch('/appscale/projects', self._update_projects) # Since this manager can be used synchronously, ensure that the projects # are populated for this IOLoop iteration. project_ids = self._zk_client.get_children('/appscale/projects') self._update_projects_sync(project_ids) if perform_admin: IOLoop.current().spawn_callback(self._contend_for_admin_lock) def _update_projects_sync(self, new_project_ids): """ Updates the list of the deployment's projects. Args: new_project_ids: A list of strings specifying current project IDs. """ for project_id in new_project_ids: if project_id not in self.projects: self.projects[project_id] = ProjectIndexManager( project_id, self._zk_client, self, self._ds_access) if self.admin_lock.is_acquired: IOLoop.current().spawn_callback( self.projects[project_id].apply_definitions) for project_id in self.projects.keys(): if project_id not in new_project_ids: self.projects[project_id].active = False del self.projects[project_id] def _update_projects(self, project_ids): """ Watches for changes to list of existing projects. Args: project_ids: A list of strings specifying current project IDs. """ persistent_update_projects = retry_children_watch_coroutine( '/appscale/projects', self._update_projects_sync) IOLoop.instance().add_callback(persistent_update_projects, project_ids) def _handle_connection_change(self): """ Notifies the admin lock holder when the connection changes. """ IOLoop.current().add_callback(self._wake_event.set) @gen.coroutine def _contend_for_admin_lock(self): """ Waits to acquire an admin lock that gives permission to apply index definitions. The lock is useful for preventing many servers from writing the same index entries at the same time. After acquiring the lock, the individual ProjectIndexManagers are responsible for mutating state whenever a project's index definitions change. """ while True: # Set up a callback to get notified if the ZK connection changes. self._wake_event.clear() self._zk_client.add_listener(self._handle_connection_change) yield self.admin_lock.acquire() try: for project_index_manager in self.projects.values(): IOLoop.current().spawn_callback( project_index_manager.apply_definitions) # Release the lock if the kazoo client gets disconnected. yield self._wake_event.wait() finally: self.admin_lock.release()
class ProjectIndexManager(object): """ Keeps track of composite index definitions for a project. """ def __init__(self, project_id, zk_client, index_manager, datastore_access): """ Creates a new ProjectIndexManager. Args: project_id: A string specifying a project ID. zk_client: A KazooClient. update_callback: A function that should be called with the project ID and index list every time the indexes get updated. index_manager: An IndexManager used for checking lock status. datastore_access: A DatastoreDistributed object. """ self.project_id = project_id self.indexes_node = '/appscale/projects/{}/indexes'.format(self.project_id) self.active = True self.update_event = AsyncEvent() self._creation_times = {} self._index_manager = index_manager self._zk_client = zk_client self._ds_access = datastore_access self._zk_client.DataWatch(self.indexes_node, self._update_indexes_watch) # Since this manager can be used synchronously, ensure that the indexes # are populated for this IOLoop iteration. try: encoded_indexes = self._zk_client.get(self.indexes_node)[0] except NoNodeError: encoded_indexes = '[]' self.indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] @property def indexes_pb(self): if self._zk_client.state != KazooState.CONNECTED: raise IndexInaccessible('ZooKeeper connection is not active') return [index.to_pb() for index in self.indexes] @gen.coroutine def apply_definitions(self): """ Populate composite indexes that are not marked as ready yet. """ try: yield self.update_event.wait() self.update_event.clear() if not self._index_manager.admin_lock.is_acquired or not self.active: return logger.info( 'Applying composite index definitions for {}'.format(self.project_id)) for index in self.indexes: if index.ready: continue # Wait until all clients have either timed out or received the new index # definition. This prevents entities from being added without entries # while the index is being rebuilt. creation_time = self._creation_times.get(index.id, time.time()) consensus = creation_time + (self._zk_client._session_timeout / 1000.0) yield gen.sleep(max(consensus - time.time(), 0)) yield self._ds_access.update_composite_index( self.project_id, index.to_pb()) logger.info('Index {} is now ready'.format(index.id)) self._mark_index_ready(index.id) logging.info( 'All composite indexes for {} are ready'.format(self.project_id)) finally: IOLoop.current().spawn_callback(self.apply_definitions) def delete_index_definition(self, index_id): """ Remove a definition from a project's list of configured indexes. Args: index_id: An integer specifying an index ID. """ try: encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) except NoNodeError: # If there are no index definitions, there is nothing to do. return node_version = znode_stat.version indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] encoded_indexes = json.dumps([index.to_dict() for index in indexes if index.id != index_id]) self._zk_client.set(self.indexes_node, encoded_indexes, version=node_version) def _mark_index_ready(self, index_id): """ Updates the index metadata to reflect the new state of the index. Args: index_id: An integer specifying an index ID. """ try: encoded_indexes, znode_stat = self._zk_client.get(self.indexes_node) node_version = znode_stat.version except NoNodeError: # If for some reason the index no longer exists, there's nothing to do. return existing_indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] for existing_index in existing_indexes: if existing_index.id == index_id: existing_index.ready = True indexes_dict = [index.to_dict() for index in existing_indexes] self._zk_client.set(self.indexes_node, json.dumps(indexes_dict), version=node_version) @gen.coroutine def _update_indexes(self, encoded_indexes): """ Handles changes to the list of a project's indexes. Args: encoded_indexes: A string containing index node data. """ encoded_indexes = encoded_indexes or '[]' self.indexes = [DatastoreIndex.from_dict(self.project_id, index) for index in json.loads(encoded_indexes)] # Mark when indexes are defined so they can be backfilled later. self._creation_times.update( {index.id: time.time() for index in self.indexes if not index.ready and index.id not in self._creation_times}) self.update_event.set() def _update_indexes_watch(self, encoded_indexes, znode_stat): """ Handles updates to the project's indexes node. Args: encoded_indexes: A string containing index node data. znode_stat: A kazoo.protocol.states.ZnodeStat object. """ if not self.active: return False IOLoop.current().add_callback(self._update_indexes, encoded_indexes)
class TornadoSubscriptionManager(SubscriptionManager): def __init__(self, pubnub_instance): subscription_manager = self self._message_queue = Queue() self._consumer_event = Event() self._cancellation_event = Event() self._subscription_lock = Semaphore(1) # self._current_request_key_object = None self._heartbeat_periodic_callback = None self._reconnection_manager = TornadoReconnectionManager(pubnub_instance) super(TornadoSubscriptionManager, self).__init__(pubnub_instance) self._start_worker() class TornadoReconnectionCallback(ReconnectionCallback): def on_reconnect(self): subscription_manager.reconnect() pn_status = PNStatus() pn_status.category = PNStatusCategory.PNReconnectedCategory pn_status.error = False subscription_manager._subscription_status_announced = True subscription_manager._listener_manager.announce_status(pn_status) self._reconnection_listener = TornadoReconnectionCallback() self._reconnection_manager.set_reconnection_listener(self._reconnection_listener) def _set_consumer_event(self): self._consumer_event.set() def _message_queue_put(self, message): self._message_queue.put(message) def _start_worker(self): self._consumer = TornadoSubscribeMessageWorker(self._pubnub, self._listener_manager, self._message_queue, self._consumer_event) run = stack_context.wrap(self._consumer.run) self._pubnub.ioloop.spawn_callback(run) def reconnect(self): self._should_stop = False self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) # self._register_heartbeat_timer() def disconnect(self): self._should_stop = True self._stop_heartbeat_timer() self._stop_subscribe_loop() @tornado.gen.coroutine def _start_subscribe_loop(self): self._stop_subscribe_loop() yield self._subscription_lock.acquire() self._cancellation_event.clear() combined_channels = self._subscription_state.prepare_channel_list(True) combined_groups = self._subscription_state.prepare_channel_group_list(True) if len(combined_channels) == 0 and len(combined_groups) == 0: return envelope_future = Subscribe(self._pubnub) \ .channels(combined_channels).channel_groups(combined_groups) \ .timetoken(self._timetoken).region(self._region) \ .filter_expression(self._pubnub.config.filter_expression) \ .cancellation_event(self._cancellation_event) \ .future() canceller_future = self._cancellation_event.wait() wi = tornado.gen.WaitIterator(envelope_future, canceller_future) # iterates 2 times: one for result one for cancelled while not wi.done(): try: result = yield wi.next() except Exception as e: # TODO: verify the error will not be eaten logger.error(e) raise else: if wi.current_future == envelope_future: e = result elif wi.current_future == canceller_future: return else: raise Exception("Unexpected future resolved: %s" % str(wi.current_future)) if e.is_error(): # 599 error doesn't works - tornado use this status code # for a wide range of errors, for ex: # HTTP Server Error (599): [Errno -2] Name or service not known if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) return logger.error("Exception in subscribe loop: %s" % str(e)) if e.status is not None and e.status.category == PNStatusCategory.PNAccessDeniedCategory: e.status.operation = PNOperationType.PNUnsubscribeOperation self._listener_manager.announce_status(e.status) self._reconnection_manager.start_polling() self.disconnect() return else: self._handle_endpoint_call(e.result, e.status) self._pubnub.ioloop.spawn_callback(self._start_subscribe_loop) finally: self._cancellation_event.set() yield tornado.gen.moment self._subscription_lock.release() self._cancellation_event.clear() break def _stop_subscribe_loop(self): if self._cancellation_event is not None and not self._cancellation_event.is_set(): self._cancellation_event.set() def _stop_heartbeat_timer(self): if self._heartbeat_periodic_callback is not None: self._heartbeat_periodic_callback.stop() def _register_heartbeat_timer(self): super(TornadoSubscriptionManager, self)._register_heartbeat_timer() self._heartbeat_periodic_callback = PeriodicCallback( stack_context.wrap(self._perform_heartbeat_loop), self._pubnub.config.heartbeat_interval * TornadoSubscriptionManager.HEARTBEAT_INTERVAL_MULTIPLIER, self._pubnub.ioloop) self._heartbeat_periodic_callback.start() @tornado.gen.coroutine def _perform_heartbeat_loop(self): if self._heartbeat_call is not None: # TODO: cancel call pass cancellation_event = Event() state_payload = self._subscription_state.state_payload() presence_channels = self._subscription_state.prepare_channel_list(False) presence_groups = self._subscription_state.prepare_channel_group_list(False) if len(presence_channels) == 0 and len(presence_groups) == 0: return try: envelope = yield self._pubnub.heartbeat() \ .channels(presence_channels) \ .channel_groups(presence_groups) \ .state(state_payload) \ .cancellation_event(cancellation_event) \ .future() heartbeat_verbosity = self._pubnub.config.heartbeat_notification_options if envelope.status.is_error: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL or \ heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_status(envelope.status) else: if heartbeat_verbosity == PNHeartbeatNotificationOptions.ALL: self._listener_manager.announce_status(envelope.status) except PubNubTornadoException: pass # TODO: check correctness # if e.status is not None and e.status.category == PNStatusCategory.PNTimeoutCategory: # self._start_subscribe_loop() # else: # self._listener_manager.announce_status(e.status) except Exception as e: print(e) finally: cancellation_event.set() @tornado.gen.coroutine def _send_leave(self, unsubscribe_operation): envelope = yield Leave(self._pubnub) \ .channels(unsubscribe_operation.channels) \ .channel_groups(unsubscribe_operation.channel_groups).future() self._listener_manager.announce_status(envelope.status)
class SlaveHolder: def __init__(self, db, queue): self.db = db self.slaves = {} self._finished = Event() self._finished.set() self.queue = queue @coroutine def start(self): self._finished.clear() logging.debug('Starting slave-holder') cur = yield self.db.execute( 'SELECT * FROM registered_bots WHERE active = TRUE') columns = [i[0] for i in cur.description] while True: row = cur.fetchone() if not row: break row = dict(zip(columns, row)) self._start_bot(**row) listen_future = self.queue.listen(slaveholder_queues(), self.queue_handler) try: yield self._finished.wait() finally: self.queue.stop(slaveholder_queues()) yield listen_future def _start_bot(self, **kwargs): @coroutine def listen_done(f: Future): logging.debug('[bot#%s] Terminated', kwargs['id']) e = f.exception() if e: logging.debug('[bot#%s] Got exception: %s %s', kwargs['id'], format_exception(*f.exc_info())) if isinstance(e, ApiError) and e.code == 401: logging.warning( '[bot#%d] Disabling due to connection error', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) elif isinstance(e, ApiError) and e.code == 400 and 'chat not found' in e.description and \ str(kwargs['moderator_chat_id']) in e.request_body: logging.warning( '[bot#%d] Disabling due to unavailable moderator chat', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) elif isinstance( e, ApiError ) and e.code == 409 and 'webhook is active' in e.description: logging.warning( '[bot#%d] Disabling due to misconfigured webhook', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) else: IOLoop.current().add_timeout(timedelta(seconds=5), self._start_bot, **kwargs) del self.slaves[kwargs['id']] slave = Slave(db=self.db, **kwargs) slave_listen_f = slave.start() self.slaves[kwargs['id']] = { 'future': slave_listen_f, 'instance': slave, } IOLoop.current().add_future(slave_listen_f, listen_done) def stop(self): logging.info('Stopping slave-holder') for slave in self.slaves.values(): slave['instance'].stop() self._finished.set() @coroutine def queue_handler(self, queue_name, body): body = loads(body.decode('utf-8')) if queue_name == QUEUE_SLAVEHOLDER_NEW_BOT: self._start_bot(**body) elif queue_name == QUEUE_SLAVEHOLDER_GET_BOT_INFO: bot = Api(body['token'], lambda x: None) if bot.bot_id in self.slaves: logging.debug('[bot#%s] Already registered', bot.bot_id) yield self.queue.send(body['reply_to'], dumps(dict(error='duplicate'))) try: ret = yield bot.get_me() logging.debug('[bot#%s] Ok', bot.bot_id) except Exception as e: logging.debug('[bot#%s] Failed', bot.bot_id) yield self.queue.send(body['reply_to'], dumps(dict(error=str(e)))) return yield self.queue.send(body['reply_to'], dumps(ret)) elif queue_name == QUEUE_SLAVEHOLDER_GET_MODERATION_GROUP: update_with_command_f = Future() timeout_f = with_timeout(timedelta(seconds=body['timeout']), update_with_command_f) @coroutine def slave_update_handler(update): logging.debug('[bot#%s] Received update', bot.bot_id) if attach_cmd_filter.test(**update): logging.debug('[bot#%s] /attach', bot.bot_id) update_with_command_f.set_result(update) elif bot_added.test(**update): logging.debug('[bot#%s] bot added to a group', bot.bot_id) update_with_command_f.set_result(update) elif CommandFilterGroupChatCreated.test( **update) or CommandFilterSupergroupChatCreated.test( **update): logging.debug('[bot#%s] group created', bot.bot_id) update_with_command_f.set_result(update) else: logging.debug('[bot#%s] unsupported update: %s', dumps(update, indent=2)) bot = Api(body['token'], slave_update_handler) @coroutine def handle_finish(f): bot.stop() if not f.exception(): logging.debug('[bot#%s] Done', bot.bot_id) update = f.result() yield self.queue.send( body['reply_to'], dumps( dict(sender=update['message']['from'], **update['message']['chat']))) # Mark last update as read f2 = bot.get_updates(update['update_id'] + 1, timeout=0, retry_on_nonuser_error=True) f2.add_done_callback( lambda x: x.exception()) # Ignore any exceptions else: logging.debug('[bot#%s] Failed: %s', bot.bot_id, f.exception()) timeout_f.add_done_callback(handle_finish) attach_cmd_filter = CommandFilterTextCmd('/attach') bot_added = CommandFilterNewChatMember(bot.bot_id) logging.debug('[bot#%s] Waiting for moderation group', bot.bot_id) bot.wait_commands() else: raise Exception('Unknown queue: %s', queue_name)
class Queue(object): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() @gen.coroutine def producer(): for item in range(5): yield q.put(item) print('Put %s' % item) @gen.coroutine def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) yield producer() # Wait for producer to put all tasks. yield q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In Python 3.5, `Queue` implements the async iterator protocol, so ``consumer()`` could be rewritten as:: async def consumer(): async for item in q: try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """Number of items allowed in the queue.""" return self._maxsize def qsize(self): """Number of items in the queue.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout=None): """Remove and return an item from the queue. Returns a Future which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """Block until all items in the queue are processed. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def __aiter__(self): return _QueueIterator(self) # These three are overridable in subclasses. def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._queue.append(item) # End of the overridable methods. def __put_internal(self, item): self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % (type(self).__name__, hex( id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class ConnectionPool(object): """ A maximum sized pool of Comm objects. This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(address) for address ...] >>> info = yield scheduler.identity() It creates enough comms to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing comms so that we don't have to continuously reconnect. It also maintains a comm limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling comms. If that doesn't do the trick then we wait until one of the occupied comms closes. Parameters ---------- limit: int The number of open comms to maintain at once deserialize: bool Whether or not to deserialize data by default or pass it through """ _instances = weakref.WeakSet() def __init__( self, limit=512, deserialize=True, serializers=None, deserializers=None, connection_args=None, timeout=None, server=None, ): self.limit = limit # Max number of open comms # Invariant: len(available) == open - active self.available = defaultdict(set) # Invariant: len(occupied) == active self.occupied = defaultdict(set) self.deserialize = deserialize self.serializers = serializers self.deserializers = deserializers if deserializers is not None else serializers self.connection_args = connection_args self.timeout = timeout self.event = Event() self.server = weakref.ref(server) if server else None self._created = weakref.WeakSet() self._instances.add(self) @property def active(self): return sum(map(len, self.occupied.values())) @property def open(self): return self.active + sum(map(len, self.available.values())) def __repr__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) def __call__(self, addr=None, ip=None, port=None): """ Cached rpc objects """ addr = addr_from_args(addr=addr, ip=ip, port=port) return PooledRPCCall(addr, self, serializers=self.serializers, deserializers=self.deserializers) async def connect(self, addr, timeout=None): """ Get a Comm to the given address. For internal use. """ available = self.available[addr] occupied = self.occupied[addr] if available: comm = available.pop() if not comm.closed(): occupied.add(comm) return comm while self.open >= self.limit: self.event.clear() self.collect() await self.event.wait() try: comm = await connect( addr, timeout=timeout or self.timeout, deserialize=self.deserialize, connection_args=self.connection_args, ) comm.name = "ConnectionPool" comm._pool = weakref.ref(self) self._created.add(comm) except Exception: raise occupied.add(comm) if self.open >= self.limit: self.event.clear() return comm def reuse(self, addr, comm): """ Reuse an open communication to the given address. For internal use. """ try: self.occupied[addr].remove(comm) except KeyError: pass else: if comm.closed(): if self.open < self.limit: self.event.set() else: self.available[addr].add(comm) def collect(self): """ Collect open but unused communications, to allow opening other ones. """ logger.info("Collecting unused comms. open: %d, active: %d", self.open, self.active) for addr, comms in self.available.items(): for comm in comms: IOLoop.current().add_callback(comm.close) comms.clear() if self.open < self.limit: self.event.set() def remove(self, addr): """ Remove all Comms to a given address. """ logger.info("Removing comms to %s", addr) if addr in self.available: comms = self.available.pop(addr) for comm in comms: IOLoop.current().add_callback(comm.close) if addr in self.occupied: comms = self.occupied.pop(addr) for comm in comms: IOLoop.current().add_callback(comm.close) if self.open < self.limit: self.event.set() def close(self): """ Close all communications abruptly. """ for comms in self.available.values(): for comm in comms: comm.abort() for comms in self.occupied.values(): for comm in comms: comm.abort() for comm in self._created: IOLoop.current().add_callback(comm.abort)
class Base: SETTINGS_PER_BOT = 1 SETTINGS_PER_USER = 2 SETTINGS_TYPE = SETTINGS_PER_BOT def __init__(self, token, stages_builder: callable, **kwargs): self.token = token self.settings = kwargs.pop('settings', {}) self.ignore_403_in_handlers = kwargs.pop('ignore_403_in_handlers', False) for key, value in kwargs.items(): self.__dict__[key] = value self.api = Api(token, self.process_update) self.user_settings = {} self.commands = {} self.raw_commands_tree = {} self.cancellation_handler = None self.unknown_command_handler = None self.updates_queue = Queue( kwargs.get('updates_queue_handlers', 4) * 10) self._init_handlers() self._stages = stages_builder(bot_id=self.bot_id) self._finished = Event() self._supported_languages = tuple([]) def _init_handlers(self): raise NotImplementedError() def _add_handler(self, handler: callable, name: pgettext = None, previous_handler: callable = None, is_final=True): if handler not in self.commands: self.commands[handler] = Command(self, handler, name) if previous_handler and previous_handler not in self.commands: raise BotError('Previous command is unknown') previous_handler_name = previous_handler.__name__ if previous_handler else 'none' if previous_handler_name not in self.raw_commands_tree: self.raw_commands_tree[previous_handler_name] = [] else: for h, _ in self.raw_commands_tree[previous_handler_name]: if h.handler == handler and handler != self.cancellation_handler: raise BotError('Command already registered') elif h.handler == handler: return self.raw_commands_tree[previous_handler_name].append( (self.commands[handler], is_final)) if not is_final and self.cancellation_handler: self._add_handler(self.cancellation_handler, previous_handler=handler, is_final=True) def _load_user_settings_per_user(self): return {} def _update_settings_for_bot(self, settings): pass def _update_settings_for_user(self, user_id, settings): pass @coroutine def update_settings(self, user_id, **kwargs): logging.info('[bot#%s] Updating settings to %s by user#%s', self.bot_id, kwargs, user_id) if self.SETTINGS_TYPE == self.SETTINGS_PER_BOT: self.settings.update(kwargs) yield maybe_future(self._update_settings_for_bot(self.settings)) else: if user_id not in self.user_settings: self.user_settings[user_id] = kwargs else: self.user_settings[user_id].update(kwargs) yield maybe_future( self._update_settings_for_user(user_id, self.settings)) def get_settings(self, user_id): if self.SETTINGS_TYPE == self.SETTINGS_PER_BOT: return deepcopy(self.settings) else: return deepcopy(self.user_settings.get(user_id, {})) @coroutine def start(self): logging.debug('[bot#%s] Starting', self.bot_id) self._finished.clear() self.user_settings = yield maybe_future( self._load_user_settings_per_user()) handlers_f = [ self._update_processor() for _ in range(self.settings.get('updates_queue_handlers', 4)) ] yield maybe_future(self._stages.restore()) try: yield self.api.wait_commands() finally: self.stop() yield handlers_f def stop(self): assert not self._finished.is_set() logging.debug('[bot#%s] Terminating', self.bot_id) self._finished.set() if self.api.is_alive: self.api.stop() @property def is_alive(self): return not self._finished.is_set() @coroutine def process_update(self, update): yield self.updates_queue.put(update) @staticmethod def get_stage_key(update): if 'message' in update: chat_id = update['message']['chat']['id'] user_id = update['message']['from']['id'] elif 'callback_query' in update: if 'message' in update['callback_query']: chat_id = update['callback_query']['message']['chat']['id'] else: chat_id = update['callback_query']['from']['id'] user_id = update['callback_query']['from']['id'] elif 'channel_post' in update: chat_id = update['channel_post']['chat']['id'] user_id = update['channel_post']['chat']['id'] elif 'edited_channel_post' in update: chat_id = update['edited_channel_post']['chat']['id'] user_id = update['edited_channel_post']['chat']['id'] else: raise BotError('Unable to get stage_key for this type of update') return '%s-%s' % (user_id, chat_id) @coroutine def _update_processor(self): while not self._finished.is_set(): try: received_update = yield self.updates_queue.get( timedelta(seconds=3)) except: continue del received_update['update_id'] try: stage_key = self.get_stage_key(received_update) current_stage = self._stages[stage_key] if current_stage: stage_data = current_stage[1] received_update.update(current_stage[1]) commands_tree = self.raw_commands_tree[current_stage[0]] else: stage_data = {} commands_tree = self.raw_commands_tree['none'] processing_result = False for command_in_tree in commands_tree: try: processing_result = yield command_in_tree[0]( **received_update) except ApiError as e: if not self.ignore_403_in_handlers or str( e.code) != '403': raise else: logging.exception( 'Got exception in message handler') if processing_result is not False: if not command_in_tree[ 1] and processing_result is not None: if processing_result is True: processing_result = {} stage_data.update(processing_result) self._stages[stage_key] = command_in_tree[ 0].handler, stage_data elif processing_result is not None: del self._stages[stage_key] break if processing_result is not False: break if processing_result is False: logging.debug('Handler not found: %s', dumps(received_update, indent=2)) if self.unknown_command_handler: try: yield maybe_future( self.unknown_command_handler( self, **received_update)) except ApiError as e: if not self.ignore_403_in_handlers or str( e.code) != '403': raise else: logging.exception( 'Got exception in message handler') except: logging.exception( '[bot#%s] Got error while processing message %s', self.bot_id, dumps(received_update, indent=2)) self.updates_queue.task_done() def __getattr__(self, name): def outer_wrapper(f): @wraps(f) def wrapper(*args, **kwargs): l = locale.get('en_US') if self.SETTINGS_TYPE == self.SETTINGS_PER_BOT: l = locale.get(self.settings.get('locale', 'en_US')) elif self.SETTINGS_TYPE == self.SETTINGS_PER_USER: chat_id = None if 'reply_to_message' in kwargs: if 'chat' in kwargs['reply_to_message']: chat_id = kwargs['reply_to_message']['chat']['id'] elif 'from' in kwargs['reply_to_message']: chat_id = kwargs['reply_to_message']['from']['id'] elif 'chat_id' in kwargs: chat_id = kwargs['chat_id'] if chat_id in self.user_settings: l = locale.get(self.user_settings[chat_id].get( 'locale', 'en_US')) return f(*set_locale_recursive(args, l), **set_locale_recursive(kwargs, l)) return wrapper if hasattr(self.api, name): attr = getattr(self.api, name) if isinstance(attr, type(self.stop)): return outer_wrapper(attr) else: return attr else: raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, name))
class ConnectionPool(object): """ A maximum sized pool of Tornado IOStreams This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(ip=ip, port=port) for ip, port in ...] >>> info = yield scheduler.identity() It creates enough streams to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing streams so that we don't have to continuously reconnect. It also maintains a stream limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling streams. If that doesn't do the trick then we wait until one of the occupied streams closes. """ def __init__(self, limit=512): self.open = 0 self.active = 0 self.limit = limit self.available = defaultdict(set) self.occupied = defaultdict(set) self.event = Event() def __str__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) __repr__ = __str__ def __call__(self, arg=None, ip=None, port=None, addr=None): """ Cached rpc objects """ ip, port = ip_port_from_args(arg=arg, addr=addr, ip=ip, port=port) return RPCCall(ip, port, self) @gen.coroutine def connect(self, ip, port, timeout=3): if self.available.get((ip, port)): stream = self.available[ip, port].pop() self.active += 1 self.occupied[ip, port].add(stream) raise gen.Return(stream) while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() self.open += 1 stream = yield connect(ip=ip, port=port, timeout=timeout) stream.set_close_callback(lambda: self.on_close(ip, port, stream)) self.active += 1 self.occupied[ip, port].add(stream) if self.open >= self.limit: self.event.clear() raise gen.Return(stream) def on_close(self, ip, port, stream): self.open -= 1 if stream in self.available[ip, port]: self.available[ip, port].remove(stream) if stream in self.occupied[ip, port]: self.occupied[ip, port].remove(stream) self.active -= 1 if self.open <= self.limit: self.event.set() def collect(self): logger.info("Collecting unused streams. open: %d, active: %d", self.open, self.active) for k, streams in list(self.available.items()): for stream in streams: stream.close()
class XBeeBase(_XBeeBase): """ Abstract base class providing command generation and response parsing methods for XBee modules. Constructor arguments: ser: The file-like serial port to use. shorthand: boolean flag which determines whether shorthand command calls (i.e. xbee.at(...) instead of xbee.send("at",...) are allowed. callback: function which should be called with frame data whenever a frame arrives from the serial port. escaped: boolean flag which determines whether the library should operate in escaped mode. In this mode, certain data bytes in the output and input streams will be escaped and unescaped in accordance with the XBee API. This setting must match the appropriate api_mode setting of an XBee device; see your XBee device's documentation for more information. error_callback: function which should be called with an Exception whenever an exception is raised while waiting for data from the serial port. This will only take affect if the callback argument is also used. """ def __init__(self, *args, **kwargs): if 'io_loop' in kwargs: self._ioloop = kwargs.pop('io_loop') else: self._ioloop = ioloop.IOLoop.current() super(XBeeBase, self).__init__(*args, **kwargs) self._running = Event() self._running.set() self._frame_future = None self._frame_queue = deque() if self._callback: # Make Non-Blocking self.serial.timeout = 0 self.process_frames() self._ioloop.add_handler(self.serial.fd, self._process_input, ioloop.IOLoop.READ) def halt(self): """ halt: None -> None Stop the event, and remove the FD from the loop handler """ if self._callback: self._running.clear() self._ioloop.remove_handler(self.serial.fd) if self._frame_future is not None: self._frame_future.set_result(None) self._frame_future = None @gen.coroutine def process_frames(self): """ process_frames: None -> None Wait for a frame to become available, when resolved call the callback """ while self._running.is_set(): try: frame = yield self._get_frame() info = self._split_response(frame.data) if info is not None: self._callback(info) except Exception as e: # Unexpected quit. if self._error_callback: self._error_callback(e) @gen.coroutine def wait_read_frame(self, timeout=None): frame = yield self._get_frame(timeout=timeout) raise gen.Return(self._split_response(frame.data)) def _get_frame(self, timeout=None): future = Future() if self._frame_queue: future.set_result(self._frame_queue.popleft()) else: if timeout is not None: def on_timeout(): future.set_exception(_TimeoutException()) handle = self._ioloop.add_timeout( self._ioloop.time() + timeout, on_timeout) future.add_done_callback( lambda _: self._ioloop.remove_timeout(handle)) self._frame_future = future return future def _process_input(self, data, events): """ _process_input: _process_input will be notified when there is data ready on the serial connection to be read. It will read and process the data into an API Frame and then either resolve a frame future, or push the frame into the queue of frames needing to be processed """ frame = APIFrame(escaped=self._escaped) byte = self.serial.read() if byte != APIFrame.START_BYTE: return # Save all following bytes, if they are not empty if len(byte) == 1: frame.fill(byte) while (frame.remaining_bytes() > 0): byte = self.serial.read() if len(byte) == 1: frame.fill(byte) try: # Try to parse and return result frame.parse() # Ignore empty frames if len(frame.data) == 0: return if self._frame_future is not None: self._frame_future.set_result(frame) self._frame_future = None else: self._frame_queue.append(frame) except ValueError: return
class DataQueue(object): def initialize(self, window_size=25600, write_queue=None): self._rcv_data = DictSequence(window_size, seq_keeper) self._sent_data = DictSequence(window_size, seq_keeper) self._left_over = "" self._data_event = Event() self._write_queue = EventQueue( ) if write_queue is None else write_queue self._next_seq = -1 def push_data(self, seq, data): # print "@", seq last_no = self._rcv_data.seq_no offset = seq_keeper.offset(last_no, seq) if offset < 1: return # drop that packet, we already received it. self._rcv_data[seq] = data # data loss detection next_seq = self._rcv_data.next_seq() offset = seq_keeper.offset(seq, next_seq) if offset < 0: print "@ data lost", seq_keeper.incr(next_seq) # Notify if we have a sequense of data that can be read if self._rcv_data.has_seq_data(): self._data_event.set() @coroutine def recv(self, max_len): yield self._data_event.wait() if self.closed(): raise Shutdown() data = self._left_over if data: max_len -= len(data) for d in self._rcv_data: data += d max_len -= len(d) if max_len <= 0: break if max_len < 0: self._left_over = data[max_len:] data = data[:max_len] else: self._left_over = "" if not self._rcv_data.has_seq_data(): self._data_event.clear() raise Return(data) @coroutine def recv_stream(self): yield self._data_event.wait() if self.closed(): raise Shutdown() self._data_event.clear() raise Return(''.join(self._rcv_data)) @coroutine def recv_file(self, fd, length): while length: # print ">", length yield self._data_event.wait() if self.closed(): raise Shutdown() for d in self._rcv_data: length -= len(d) fd.write(d) if length == 0: break if not self._rcv_data.has_seq_data(): self._data_event.clear() @coroutine def send(self, data): if self._write_queue: yield self._write_queue.wait() if self.closed(): raise Shutdown() q = 1 length = len(data) data_size = self.mss - data_header_size n_packet = length / data_size + int(bool(length % data_size)) for n in xrange(n_packet): p = DataPacket(sock_id=self.sock_id, data=data[n * data_size:(n + 1) * data_size]) p.set_seq(self._sent_data.add(p)) pack = p.pack() write_future = self.write(pack) if write_future is not None: if not self._write_queue: self._write_queue.wait() q += 1 yield write_future self.write(pack) self._write_queue.next(q) @coroutine def send_file(self, fd, length): if self._write_queue: yield self._write_queue.wait() if self.closed(): raise Shutdown() q = 1 data_size = self.mss - data_header_size n_packet = length / data_size + int(bool(length % data_size)) for n in xrange(n_packet): p = DataPacket(sock_id=self.sock_id, data=fd.read(data_size)) assert len(p.data) > 0, "Data packet is empty!" p.set_seq(self._sent_data.add(p)) # print "%", p.seq, len(p.data) del self._sent_data[ p.seq] # until ack is intruduced, else it leaks pack = p.pack() write_future = self.write(pack) if write_future is not None: if not self._write_queue: self._write_queue.wait() q += 1 # print "@@@@@@@@" # yield sleep(.001) yield write_future self.write(pack) self._write_queue.next(q)
class XBeeBase(_XBeeBase): """ Abstract base class providing command generation and response parsing methods for XBee modules. Constructor arguments: ser: The file-like serial port to use. shorthand: boolean flag which determines whether shorthand command calls (i.e. xbee.at(...) instead of xbee.send("at",...) are allowed. callback: function which should be called with frame data whenever a frame arrives from the serial port. escaped: boolean flag which determines whether the library should operate in escaped mode. In this mode, certain data bytes in the output and input streams will be escaped and unescaped in accordance with the XBee API. This setting must match the appropriate api_mode setting of an XBee device; see your XBee device's documentation for more information. error_callback: function which should be called with an Exception whenever an exception is raised while waiting for data from the serial port. This will only take affect if the callback argument is also used. """ def __init__(self, *args, **kwargs): if 'io_loop' in kwargs: self._ioloop = kwargs.pop('io_loop') else: self._ioloop = ioloop.IOLoop.current() super(XBeeBase, self).__init__(*args, **kwargs) self._running = Event() self._running.set() self._frame_future = None self._frame_queue = deque() if self._callback: # Make Non-Blocking self.serial.timeout = 0 self.process_frames() self._ioloop.add_handler(self.serial.fd, self._process_input, ioloop.IOLoop.READ) def halt(self): """ halt: None -> None Stop the event, and remove the FD from the loop handler """ if self._callback: self._running.clear() self._ioloop.remove_handler(self.serial.fd) if self._frame_future is not None: self._frame_future.set_result(None) self._frame_future = None @gen.coroutine def process_frames(self): """ process_frames: None -> None Wait for a frame to become available, when resolved call the callback """ while self._running.is_set(): try: frame = yield self._get_frame() info = self._split_response(frame.data) if info is not None: self._callback(info) except Exception as e: # Unexpected quit. if self._error_callback: self._error_callback(e) @gen.coroutine def wait_read_frame(self, timeout=None): frame = yield self._get_frame(timeout=timeout) raise gen.Return(self._split_response(frame.data)) def _get_frame(self, timeout=None): future = Future() if self._frame_queue: future.set_result(self._frame_queue.popleft()) else: if timeout is not None: def on_timeout(): future.set_exception(_TimeoutException()) handle = self._ioloop.add_timeout( self._ioloop.time() + timeout, on_timeout ) future.add_done_callback(lambda _: self._ioloop.remove_timeout(handle)) self._frame_future = future return future def _process_input(self, data, events): """ _process_input: _process_input will be notified when there is data ready on the serial connection to be read. It will read and process the data into an API Frame and then either resolve a frame future, or push the frame into the queue of frames needing to be processed """ frame = APIFrame(escaped=self._escaped) byte = self.serial.read() if byte != APIFrame.START_BYTE: return # Save all following bytes, if they are not empty if len(byte) == 1: frame.fill(byte) while(frame.remaining_bytes() > 0): byte = self.serial.read() if len(byte) == 1: frame.fill(byte) try: # Try to parse and return result frame.parse() # Ignore empty frames if len(frame.data) == 0: return if self._frame_future is not None: self._frame_future.set_result(frame) self._frame_future = None else: self._frame_queue.append(frame) except ValueError: return
class SlaveHolder: def __init__(self, db, queue): self.db = db self.slaves = {} self._finished = Event() self._finished.set() self.queue = queue @coroutine def start(self): self._finished.clear() logging.debug('Starting slave-holder') cur = yield self.db.execute('SELECT * FROM registered_bots WHERE active = TRUE') columns = [i[0] for i in cur.description] while True: row = cur.fetchone() if not row: break row = dict(zip(columns, row)) self._start_bot(**row) listen_future = self.queue.listen(slaveholder_queues(), self.queue_handler) try: yield self._finished.wait() finally: self.queue.stop(slaveholder_queues()) yield listen_future def _start_bot(self, **kwargs): @coroutine def listen_done(f: Future): logging.debug('[bot#%s] Terminated', kwargs['id']) e = f.exception() if e: logging.debug('[bot#%s] Got exception: %s %s', kwargs['id'], format_exception(*f.exc_info())) if isinstance(e, ApiError) and e.code == 401: logging.warning('[bot#%d] Disabling due to connection error', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) elif isinstance(e, ApiError) and e.code == 400 and 'chat not found' in e.description and \ str(kwargs['moderator_chat_id']) in e.request_body: logging.warning('[bot#%d] Disabling due to unavailable moderator chat', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) elif isinstance(e, ApiError) and e.code == 409 and 'webhook is active' in e.description: logging.warning('[bot#%d] Disabling due to misconfigured webhook', kwargs['id']) yield self.queue.send(QUEUE_BOTERATOR_BOT_REVOKE, dumps(dict(error=str(e), **kwargs))) else: IOLoop.current().add_timeout(timedelta(seconds=5), self._start_bot, **kwargs) del self.slaves[kwargs['id']] slave = Slave(db=self.db, **kwargs) slave_listen_f = slave.start() self.slaves[kwargs['id']] = { 'future': slave_listen_f, 'instance': slave, } IOLoop.current().add_future(slave_listen_f, listen_done) def stop(self): logging.info('Stopping slave-holder') for slave in self.slaves.values(): slave['instance'].stop() self._finished.set() @coroutine def queue_handler(self, queue_name, body): body = loads(body.decode('utf-8')) if queue_name == QUEUE_SLAVEHOLDER_NEW_BOT: self._start_bot(**body) elif queue_name == QUEUE_SLAVEHOLDER_GET_BOT_INFO: bot = Api(body['token'], lambda x: None) if bot.bot_id in self.slaves: logging.debug('[bot#%s] Already registered', bot.bot_id) yield self.queue.send(body['reply_to'], dumps(dict(error='duplicate'))) try: ret = yield bot.get_me() logging.debug('[bot#%s] Ok', bot.bot_id) except Exception as e: logging.debug('[bot#%s] Failed', bot.bot_id) yield self.queue.send(body['reply_to'], dumps(dict(error=str(e)))) return yield self.queue.send(body['reply_to'], dumps(ret)) elif queue_name == QUEUE_SLAVEHOLDER_GET_MODERATION_GROUP: update_with_command_f = Future() timeout_f = with_timeout(timedelta(seconds=body['timeout']), update_with_command_f) @coroutine def slave_update_handler(update): logging.debug('[bot#%s] Received update', bot.bot_id) if attach_cmd_filter.test(**update): logging.debug('[bot#%s] /attach', bot.bot_id) update_with_command_f.set_result(update) elif bot_added.test(**update): logging.debug('[bot#%s] bot added to a group', bot.bot_id) update_with_command_f.set_result(update) elif CommandFilterGroupChatCreated.test(**update) or CommandFilterSupergroupChatCreated.test(**update): logging.debug('[bot#%s] group created', bot.bot_id) update_with_command_f.set_result(update) else: logging.debug('[bot#%s] unsupported update: %s', dumps(update, indent=2)) bot = Api(body['token'], slave_update_handler) @coroutine def handle_finish(f): bot.stop() if not f.exception(): logging.debug('[bot#%s] Done', bot.bot_id) update = f.result() yield self.queue.send(body['reply_to'], dumps(dict(sender=update['message']['from'], **update['message']['chat']))) # Mark last update as read f2 = bot.get_updates(update['update_id'] + 1, timeout=0, retry_on_nonuser_error=True) f2.add_done_callback(lambda x: x.exception()) # Ignore any exceptions else: logging.debug('[bot#%s] Failed: %s', bot.bot_id, f.exception()) timeout_f.add_done_callback(handle_finish) attach_cmd_filter = CommandFilterTextCmd('/attach') bot_added = CommandFilterNewChatMember(bot.bot_id) logging.debug('[bot#%s] Waiting for moderation group', bot.bot_id) bot.wait_commands() else: raise Exception('Unknown queue: %s', queue_name)
class AsyncKazooLock(object): """ A lock based on kazoo.recipe.Lock and modified to work as a coroutine. """ # Node name, after the contender UUID, before the sequence # number. Involved in read/write locks. _NODE_NAME = "__lock__" # Node names which exclude this contender when present at a lower # sequence number. Involved in read/write locks. _EXCLUDE_NAMES = ["__lock__"] def __init__(self, client, path, identifier=None): """ Creates an AsyncKazooLock. Args: client: A KazooClient. path: The lock path to use. identifier: The name to use for this lock contender. This can be useful for querying to see who the current lock contenders are. """ self.client = client self.tornado_kazoo = TornadoKazoo(client) self.path = path # some data is written to the node. this can be queried via # contenders() to see who is contending for the lock self.data = str(identifier or "").encode('utf-8') self.node = None self.wake_event = AsyncEvent() # props to Netflix Curator for this trick. It is possible for our # create request to succeed on the server, but for a failure to # prevent us from getting back the full path name. We prefix our # lock name with a uuid and can check for its presence on retry. self.prefix = uuid.uuid4().hex + self._NODE_NAME self.create_path = self.path + "/" + self.prefix self.create_tried = False self.is_acquired = False self.assured_path = False self.cancelled = False self._retry = AsyncKazooRetry(max_tries=-1) self._lock = AsyncLock() @gen.coroutine def _ensure_path(self): yield self.tornado_kazoo.ensure_path(self.path) self.assured_path = True def cancel(self): """ Cancels a pending lock acquire. """ self.cancelled = True self.wake_event.set() @gen.coroutine def acquire(self, timeout=None, ephemeral=True): """ Acquires the lock. By default, it blocks and waits forever. Args: timeout: A float specifying how long to wait to acquire the lock. ephemeral: A boolean indicating that the lock should use an ephemeral node. Raises: LockTimeout if the lock wasn't acquired within `timeout` seconds. """ retry = self._retry.copy() retry.deadline = timeout # Ensure we are locked so that we avoid multiple coroutines in # this acquisition routine at the same time... timeout_interval = None if timeout is not None: timeout_interval = datetime.timedelta(seconds=timeout) try: with (yield self._lock.acquire(timeout=timeout_interval)): already_acquired = self.is_acquired gotten = False try: gotten = yield retry(self._inner_acquire, timeout=timeout, ephemeral=ephemeral) except RetryFailedError: pass except KazooException: # if we did ultimately fail, attempt to clean up exc_info = sys.exc_info() if not already_acquired: yield self._best_effort_cleanup() self.cancelled = False six.reraise(exc_info[0], exc_info[1], exc_info[2]) if gotten: self.is_acquired = gotten if not gotten and not already_acquired: yield self._best_effort_cleanup() raise gen.Return(gotten) except gen.TimeoutError: raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) def _watch_session(self, state): self.wake_event.set() return True def _watch_session_listener(self, state): IOLoop.current().add_callback(self._watch_session, state) @gen.coroutine def _inner_acquire(self, timeout, ephemeral=True): # wait until it's our chance to get it.. if self.is_acquired: raise ForceRetryError() # make sure our election parent node exists if not self.assured_path: yield self._ensure_path() node = None if self.create_tried: node = yield self._find_node() else: self.create_tried = True if not node: node = yield self.tornado_kazoo.create( self.create_path, self.data, ephemeral=ephemeral, sequence=True) # strip off path to node node = node[len(self.path) + 1:] self.node = node while True: self.wake_event.clear() # bail out with an exception if cancellation has been requested if self.cancelled: raise CancelledError() children = yield self._get_sorted_children() try: our_index = children.index(node) except ValueError: # pragma: nocover # somehow we aren't in the children -- probably we are # recovering from a session failure and our ephemeral # node was removed raise ForceRetryError() predecessor = self.predecessor(children, our_index) if not predecessor: raise gen.Return(True) # otherwise we are in the mix. watch predecessor and bide our time predecessor = self.path + "/" + predecessor self.client.add_listener(self._watch_session_listener) try: yield self.tornado_kazoo.get(predecessor, self._watch_predecessor) except NoNodeError: pass # predecessor has already been deleted else: try: yield self.wake_event.wait(timeout) except gen.TimeoutError: raise LockTimeout("Failed to acquire lock on %s after " "%s seconds" % (self.path, timeout)) finally: self.client.remove_listener(self._watch_session_listener) def predecessor(self, children, index): for c in reversed(children[:index]): if any(n in c for n in self._EXCLUDE_NAMES): return c return None def _watch_predecessor(self, event): self.wake_event.set() @gen.coroutine def _get_sorted_children(self): children = yield self.tornado_kazoo.get_children(self.path) # Node names are prefixed by a type: strip the prefix first, which may # be one of multiple values in case of a read-write lock, and return # only the sequence number (as a string since it is padded and will # sort correctly anyway). # # In some cases, the lock path may contain nodes with other prefixes # (eg. in case of a lease), just sort them last ('~' sorts after all # ASCII digits). def _seq(c): for name in ["__lock__", "__rlock__"]: idx = c.find(name) if idx != -1: return c[idx + len(name):] # Sort unknown node names eg. "lease_holder" last. return '~' children.sort(key=_seq) raise gen.Return(children) @gen.coroutine def _find_node(self): children = yield self.tornado_kazoo.get_children(self.path) for child in children: if child.startswith(self.prefix): raise gen.Return(child) raise gen.Return(None) @gen.coroutine def _delete_node(self, node): yield self.tornado_kazoo.delete(self.path + "/" + node) @gen.coroutine def _best_effort_cleanup(self): try: node = self.node if not node: node = yield self._find_node() if node: yield self._delete_node(node) except KazooException: # pragma: nocover pass @gen.coroutine def release(self): """Release the lock immediately.""" retry = self._retry.copy() release_response = yield retry(self._inner_release) raise gen.Return(release_response) @gen.coroutine def _inner_release(self): if not self.is_acquired: raise gen.Return(False) try: yield self._delete_node(self.node) except NoNodeError: # pragma: nocover pass self.is_acquired = False self.node = None raise gen.Return(True) @gen.coroutine def contenders(self): """ Returns an ordered list of the current contenders for the lock. """ # make sure our election parent node exists if not self.assured_path: yield self._ensure_path() children = yield self._get_sorted_children() contenders = [] for child in children: try: data = yield self.tornado_kazoo.get(self.path + "/" + child)[0] contenders.append(data.decode('utf-8')) except NoNodeError: # pragma: nocover pass raise gen.Return(contenders)
class MockFitsWriterClient(object): """ Wrapper class for a KATCP client to a EddFitsWriterServer """ def __init__(self, address, record_dest): """ @brief Construct new instance If record_dest is not empty, create a folder named record_dest and record the received packages there. """ self._address = address self.__record_dest = record_dest if record_dest: if not os.path.isdir(record_dest): os.makedirs(record_dest) self._ioloop = IOLoop.current() self._stop_event = Event() self._is_stopped = Condition() self._socket = None self.__last_package = 0 def reset_connection(self): self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._socket.setblocking(False) try: self._socket.connect(self._address) except socket.error as error: if error.args[0] == errno.EINPROGRESS: pass else: raise error @coroutine def recv_nbytes(self, nbytes): received_bytes = 0 data = b'' while received_bytes < nbytes: if self._stop_event.is_set(): raise StopEvent try: log.debug("Requesting {} bytes".format(nbytes - received_bytes)) current_data = self._socket.recv(nbytes - received_bytes) received_bytes += len(current_data) data += current_data log.debug("Received {} bytes ({} of {} bytes)".format( len(current_data), received_bytes, nbytes)) except socket.error as error: error_id = error.args[0] if error_id == errno.EAGAIN or error_id == errno.EWOULDBLOCK: yield sleep(0.1) else: log.exception("Unexpected error on socket recv: {}".format( str(error))) raise error raise Return(data) @coroutine def recv_loop(self): while not self._stop_event.is_set(): try: header, sections = yield self.recv_packet() except StopEvent: log.debug("Notifying that recv calls have stopped") except Exception as E: log.exception("Failure while receiving packet: {}".format(E)) def start(self): self._stop_event.clear() self.reset_connection() self._ioloop.add_callback(self.recv_loop) @coroutine def stop(self, timeout=2): self._stop_event.set() try: success = yield self._is_stopped.wait(timeout=self._ioloop.time() + timeout) if not success: raise TimeoutError except TimeoutError: log.error(("Could not stop the client within " "the {} second limit").format(timeout)) except Exception: log.exception("Fucup") @coroutine def recv_packet(self): log.debug("Receiving packet header") raw_header = yield self.recv_nbytes(C.sizeof(FWHeader)) log.debug("Converting packet header") header = FWHeader.from_buffer_copy(raw_header) log.info("Received header: {}".format(header)) if header.timestamp < self.__last_package: log.error("Timestamps out of order!") else: self.__last_package = header.timestamp if self.__record_dest: filename = os.path.join(self.__record_dest, "FWP_{}.dat".format(header.timestamp)) while os.path.isfile(filename): log.warning('Filename {} already exists. Add suffix _'.format( filename)) filename += '_' log.info('Recording to file {}'.format(filename)) ofile = open(filename, 'wb') ofile.write(raw_header) fw_data_type = header.channel_data_type.strip().upper() c_data_type, np_data_type = TYPE_MAP[fw_data_type] sections = [] for section in range(header.nsections): log.debug("Receiving section {} of {}".format( section + 1, header.nsections)) raw_section_header = yield self.recv_nbytes( C.sizeof(FWSectionHeader)) if self.__record_dest: ofile.write(raw_section_header) section_header = FWSectionHeader.from_buffer_copy( raw_section_header) log.info("Section {} header: {}".format(section, section_header)) log.debug("Receiving section data") raw_bytes = yield self.recv_nbytes( C.sizeof(c_data_type) * section_header.nchannels) if self.__record_dest: ofile.write(raw_bytes) data = np.frombuffer(raw_bytes, dtype=np_data_type) log.info("Section {} data: {}".format(section, data[:10])) sections.append((section_header, data)) if self.__record_dest: ofile.close() raise Return((header, sections))
class XEngineOperations(object): def __init__(self, corr_obj): """ A collection of x-engine operations that act on/with a correlator instance. :param corr_obj: the FxCorrelator instance :return: """ self.corr = corr_obj self.hosts = corr_obj.xhosts self.logger = corr_obj.logger self.data_stream = None self.vacc_synch_running = IOLoopEvent() self.vacc_synch_running.clear() self.vacc_check_enabled = IOLoopEvent() self.vacc_check_enabled.clear() self.vacc_check_cb = None self.vacc_check_cb_data = None @staticmethod def _gberst(hosts, state): THREADED_FPGA_OP( hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.control.write(gbe_rst=state),)) def initialise_post_gbe(self): """ Perform post-gbe setup initialisation steps :return: """ # write the board IDs to the xhosts board_id = 0 for f in self.hosts: f.registers.board_id.write(reg=board_id) board_id += 1 # write the data stream destination to the registers self.write_data_stream_destination(None) # clear gbe status THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.control.write(gbe_debug_rst='pulse'),)) # release cores from reset XEngineOperations._gberst(self.hosts, False) # simulator if use_xeng_sim: THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.simulator.write(en=True),)) # set up accumulation length self.set_acc_len(vacc_resync=False) # clear general status THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.control.write(status_clr='pulse'),)) # check for errors # TODO - read status regs? def initialise_pre_gbe(self): """ Set up x-engines on this device. :return: """ # simulator if use_xeng_sim: THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.simulator.write( en=False, rst='pulse'),)) # set the gapsize register gapsize = int(self.corr.configd['xengine']['10gbe_pkt_gapsize']) self.logger.info('X-engines: setting packet gap size to %i' % gapsize) if 'gapsize' in self.hosts[0].registers.names(): # these versions have the correct logic surrounding the register THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.gapsize.write_int(gapsize),)) elif 'gap_size' in self.hosts[0].registers.names(): # these versions do not, they need a software hack for the setting # to 'take' THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.gap_size.write_int(gapsize),)) # HACK - this is a hack to overcome broken x-engine firmware in # versions around a2d0615bc9cd95eabf7c8ed922c1a15658c0688e. # The logic next to the gap_size register is broken, registering # the LAST value written, not the new one. THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.gap_size.write_int( gapsize-1),)) # /HACK else: _errmsg = 'X-engine image has no register gap_size/gapsize?' self.logger.exception(_errmsg) raise RuntimeError(_errmsg) # disable transmission, place cores in reset, and give control # register a known state self.xeng_tx_disable(None) XEngineOperations._gberst(self.hosts, True) self.clear_status_all() def configure(self): """ Configure the xengine operations - this is done whenever a correlator is instantiated. :return: """ # set up the xengine data stream self._setup_data_stream() def _setup_data_stream(self): """ Set up the data stream for the xengine output :return: """ # the x-engine output data stream setup _xeng_d = self.corr.configd['xengine'] data_addr = NetAddress(_xeng_d['output_destination_ip'], _xeng_d['output_destination_port']) meta_addr = NetAddress(_xeng_d['output_destination_ip'], _xeng_d['output_destination_port']) xeng_stream = data_stream.DataStream( name=_xeng_d['output_products'][0], category=data_stream.XENGINE_CROSS_PRODUCTS, destination=data_addr, meta_destination=meta_addr, destination_cb=self.write_data_stream_destination, meta_destination_cb=self.spead_meta_issue_all, tx_enable_method=self.xeng_tx_enable, tx_disable_method=self.xeng_tx_disable) self.data_stream = xeng_stream self.corr.register_data_stream(xeng_stream) self.vacc_check_enabled.clear() self.vacc_synch_running.clear() if self.vacc_check_cb is not None: self.vacc_check_cb.stop() self.vacc_check_cb = None def _vacc_periodic_check(self): self.logger.debug('Checking vacc operation @ %s' % time.ctime()) if not self.vacc_check_enabled.is_set(): self.logger.info('Check logic disabled, exiting') return if self.vacc_synch_running.is_set(): self.logger.info('vacc_sync is currently running, exiting') return def get_data(): """ Get the relevant data from the X-engine FPGAs """ # older versions had other register names _OLD = 'reorderr_timeout0' in self.hosts[0].registers.names() def _get_reorder_data(fpga): rv = {} for _ctr in range(0, fpga.x_per_fpga): if _OLD: _reg = fpga.registers['reorderr_timeout%i' % _ctr] rv['etim%i' % _ctr] = _reg.read()['data']['reg'] else: _reg = fpga.registers['reorderr_timedisc%i' % _ctr] rv['etim%i' % _ctr] = _reg.read()['data']['timeout'] return rv reo_data = THREADED_FPGA_OP(self.hosts, timeout=5, target_function=_get_reorder_data) vacc_data = self.vacc_status() return {'reorder': reo_data, 'vacc': vacc_data} def _vacc_data_check(d0, d1): # check errors are not incrementing for host in self.hosts: for xeng in range(0, host.x_per_fpga): status0 = d0[host.host][xeng] status1 = d1[host.host][xeng] if ((status1['errors'] > status0['errors']) or (status0['errors'] != 0)): self.logger.error(' vacc %i on %s has ' 'errors' % (xeng, host.host)) return False # check that the accumulations are ticking over for host in self.hosts: for xeng in range(0, host.x_per_fpga): status0 = d0[host.host][xeng] status1 = d1[host.host][xeng] if status1['count'] == status0['count']: self.logger.error(' vacc %i on %s is not ' 'incrementing' % (xeng, host.host)) return False return True def _reorder_data_check(d0, d1): for host in self.hosts: for ctr in range(0, host.x_per_fpga): reg = 'etim%i' % ctr if d0[host.host][reg] != d1[host.host][reg]: self.logger.error(' %s - vacc check reorder ' 'reg %s error' % (host.host, reg)) return False return True new_data = get_data() # self.logger.info('new_data: %s' % new_data) if self.vacc_check_cb_data is not None: force_sync = False # check the vacc status data first if not _vacc_data_check(self.vacc_check_cb_data['vacc'], new_data['vacc']): force_sync = True # check the reorder data if not force_sync: if not _reorder_data_check(self.vacc_check_cb_data['reorder'], new_data['reorder']): force_sync = True if force_sync: self.logger.error(' forcing vacc sync') self.vacc_sync() self.corr.logger.debug('scheduled check done @ %s' % time.ctime()) self.vacc_check_cb_data = new_data def vacc_check_timer_stop(self): """ Disable the vacc_check timer :return: """ if self.vacc_check_cb is not None: self.vacc_check_cb.stop() self.vacc_check_cb = None self.vacc_check_enabled.clear() self.corr.logger.info('vacc check timer stopped') def vacc_check_timer_start(self, vacc_check_time=30): """ Set up a periodic check on the vacc operation. :param vacc_check_time: the interval, in seconds, at which to check :return: """ if not IOLoop.current()._running: raise RuntimeError('IOLoop not running, this will not work') self.logger.info('xeng_setup_vacc_check_timer: setting up the ' 'vacc check timer at %i seconds' % vacc_check_time) if vacc_check_time < self.get_acc_time(): raise RuntimeError('A check time smaller than the accumulation' 'time makes no sense.') if self.vacc_check_cb is not None: self.vacc_check_cb.stop() self.vacc_check_cb = PeriodicCallback(self._vacc_periodic_check, vacc_check_time * 1000) self.vacc_check_enabled.set() self.vacc_check_cb.start() self.corr.logger.info('vacc check timer started') def write_data_stream_destination(self, data_stream): """ Write the x-engine data stream destination to the hosts. :param data_stream - the data stream on which to act :return: """ dstrm = data_stream or self.data_stream txip = int(dstrm.destination.ip) txport = dstrm.destination.port try: THREADED_FPGA_OP( self.hosts, timeout=10, target_function=(lambda fpga_: fpga_.registers.gbe_iptx.write(reg=txip),)) THREADED_FPGA_OP( self.hosts, timeout=10, target_function=(lambda fpga_: fpga_.registers.gbe_porttx.write(reg=txport),)) except AttributeError: self.logger.warning('Writing stream %s destination to ' 'hardware failed!' % dstrm.name) # update meta data on stream destination change self.spead_meta_update_stream_destination() dstrm.meta_transmit() self.logger.info('Wrote stream %s destination to %s in hardware' % ( dstrm.name, dstrm.destination)) def clear_status_all(self): """ Clear the various status registers and counters on all the fengines :return: """ THREADED_FPGA_FUNC(self.hosts, timeout=10, target_function='clear_status') def subscribe_to_multicast(self): """ Subscribe the x-engines to the f-engine output multicast groups - each one subscribes to only one group, with data meant only for it. :return: """ if self.corr.fengine_output.is_multicast(): self.logger.info('F > X is multicast from base %s' % self.corr.fengine_output) source_address = str(self.corr.fengine_output.ip_address) source_bits = source_address.split('.') source_base = int(source_bits[3]) source_prefix = '%s.%s.%s.' % (source_bits[0], source_bits[1], source_bits[2]) source_ctr = 0 for host_ctr, host in enumerate(self.hosts): for gbe in host.tengbes: rxaddress = '%s%d' % (source_prefix, source_base + source_ctr) gbe.multicast_receive(rxaddress, 0) # CLUDGE source_ctr += 1 # source_ctr += 4 self.logger.info('\txhost %s %s subscribing to address %s' % (host.host, gbe.name, rxaddress)) else: self.logger.info('F > X is unicast from base %s' % self.corr.fengine_output) def check_rx(self, max_waittime=30): """ Check that the x hosts are receiving data correctly :param max_waittime: :return: """ self.logger.info('Checking X hosts are receiving data...') results = THREADED_FPGA_FUNC( self.hosts, timeout=max_waittime+1, target_function=('check_rx', (max_waittime,),)) all_okay = True for _v in results.values(): all_okay = all_okay and _v if not all_okay: self.logger.error('\tERROR in X-engine rx data.') self.logger.info('\tdone.') return all_okay def vacc_status(self): """ Get a dictionary of the vacc status registers for all x-engines. :return: {} """ return THREADED_FPGA_FUNC(self.hosts, timeout=10, target_function='vacc_get_status') def _vacc_sync_check_reset(self): """ Do the vaccs need resetting before a synch? :return: """ vaccstat = THREADED_FPGA_FUNC( self.hosts, timeout=10, target_function='vacc_check_arm_load_counts') reset_required = False for xhost, result in vaccstat.items(): if result: self.logger.info('xeng_vacc_sync: %s has a vacc that ' 'needs resetting' % xhost) reset_required = True if reset_required: THREADED_FPGA_FUNC(self.hosts, timeout=10, target_function='vacc_reset') vaccstat = THREADED_FPGA_FUNC( self.hosts, timeout=10, target_function='vacc_check_reset_status') for xhost, result in vaccstat.items(): if not result: errstr = 'xeng_vacc_sync: resetting vaccs on ' \ '%s failed.' % xhost self.logger.error(errstr) raise RuntimeError(errstr) def _vacc_sync_create_loadtime(self, min_loadtime): """ Calculate the load time for the vacc synch based on a given minimum load time :param min_loadtime: :return: the vacc load time, in seconds since the UNIX epoch """ # how long should we wait for the vacc load self.logger.info('Vacc sync time not specified. Syncing in ' '%2.2f seconds\' time.' % (min_loadtime*2)) t_now = time.time() vacc_load_time = t_now + (min_loadtime*2) if vacc_load_time < (t_now + min_loadtime): raise RuntimeError( 'Cannot load at a time in the past. ' 'Need at least %2.2f seconds lead time. You asked for ' '%s.%i, and it is now %s.%i.' % ( min_loadtime, time.strftime('%H:%M:%S', time.gmtime(vacc_load_time)), (vacc_load_time-int(vacc_load_time))*100, time.strftime('%H:%M:%S', time.gmtime(t_now)), (t_now-int(t_now))*100)) self.logger.info(' xeng vaccs will sync at %s (in %2.2fs)' % (time.ctime(t_now), vacc_load_time-t_now)) return vacc_load_time def _vacc_sync_calc_load_mcount(self, vacc_loadtime): """ Calculate the loadtime in clock ticks :param vacc_loadtime: :return: """ ldmcnt = int(self.corr.mcnt_from_time(vacc_loadtime)) self.logger.debug('$$$$$$$$$$$ - ldmcnt = %i' % ldmcnt) _ldmcnt_orig = ldmcnt _cfgd = self.corr.configd n_chans = int(_cfgd['fengine']['n_chans']) xeng_acc_len = int(_cfgd['xengine']['xeng_accumulation_len']) quantisation_bits = int( numpy.log2(n_chans) + 1 + numpy.log2(xeng_acc_len)) self.logger.debug('$$$$$$$$$$$ - quant bits = %i' % quantisation_bits) ldmcnt = ((ldmcnt >> quantisation_bits) + 1) << quantisation_bits self.logger.debug('$$$$$$$$$$$ - ldmcnt quantised = %i' % ldmcnt) self.logger.debug('$$$$$$$$$$$ - ldmcnt diff = %i' % ( ldmcnt - _ldmcnt_orig)) if _ldmcnt_orig > ldmcnt: raise RuntimeError('Quantising the ldmcnt has broken it: %i -> ' '%i, diff(%i)' % (_ldmcnt_orig, ldmcnt, ldmcnt - _ldmcnt_orig)) time_from_mcnt = self.corr.time_from_mcnt(ldmcnt) t_now = time.time() if time_from_mcnt <= t_now: self.logger.warn(' Warning: the board timestamp has probably' ' wrapped! mcnt_time(%.3f) time.time(%.3f)' % (time_from_mcnt, t_now)) return ldmcnt def _vacc_sync_print_vacc_statuses(self, vstatus): """ Print the vacc statuses to the logger :param vstatus: :return: """ self.logger.info('vacc statii:') for _host in self.hosts: self.logger.info(' %s:' % _host.host) for _ctr, _status in enumerate(vstatus[_host.host]): self.logger.info(' %i: %s' % (_ctr, _status)) def _vacc_sync_check_counts_initial(self): """ Check the arm and load counts initially :return: """ # read the current arm and load counts vacc_status = self.vacc_status() arm_count0 = vacc_status[self.hosts[0].host][0]['armcount'] load_count0 = vacc_status[self.hosts[0].host][0]['loadcount'] # check the xhosts load and arm counts for host in self.hosts: for status in vacc_status[host.host]: _bad_ldcnt = status['loadcount'] != load_count0 _bad_armcnt = status['armcount'] != arm_count0 if _bad_ldcnt or _bad_armcnt: _err = 'All hosts do not have matching arm and ' \ 'load counts.' self.logger.error(_err) self._vacc_sync_print_vacc_statuses(vacc_status) raise RuntimeError(_err) self.logger.info(' Before arming: arm_count(%i) load_count(%i)' % (arm_count0, load_count0)) return arm_count0, load_count0 def _vacc_sync_check_arm_count(self, armcount_initial): """ Check that the arm count increased :return: """ vacc_status = self.vacc_status() arm_count_new = vacc_status[self.hosts[0].host][0]['armcount'] for host in self.hosts: for status in vacc_status[host.host]: if ((status['armcount'] != arm_count_new) or (status['armcount'] != armcount_initial + 1)): _err = 'xeng_vacc_sync: all hosts do not have ' \ 'matching arm counts or arm count did ' \ 'not increase.' self.logger.error(_err) self._vacc_sync_print_vacc_statuses(vacc_status) return False self.logger.info(' Done arming') return True def _vacc_sync_check_loadtimes(self): """ :return: """ lsws = THREADED_FPGA_OP( self.hosts, timeout=10, target_function=( lambda x: x.registers.vacc_time_lsw.read()['data']),) msws = THREADED_FPGA_OP( self.hosts, timeout=10, target_function=( lambda x: x.registers.vacc_time_msw.read()['data']),) _host0 = self.hosts[0].host for host in self.hosts: if ((lsws[host.host]['lsw'] != lsws[_host0]['lsw']) or (msws[host.host]['msw'] != msws[_host0]['msw'])): _err = 'xeng_vacc_sync: all hosts do not have matching ' \ 'vacc LSWs and MSWs' self.logger.error(_err) self.logger.error('LSWs: %s' % lsws) self.logger.error('MSWs: %s' % msws) vacc_status = self.vacc_status() self._vacc_sync_print_vacc_statuses(vacc_status) return False lsw = lsws[self.hosts[0].host]['lsw'] msw = msws[self.hosts[0].host]['msw'] xldtime = (msw << 32) | lsw self.logger.info(' x engines have vacc ld time %i' % xldtime) return True def _vacc_sync_wait_for_arm(self, load_mcount): """ :param load_mcount: :return: """ t_now = time.time() time_from_mcnt = self.corr.time_from_mcnt(load_mcount) wait_time = time_from_mcnt - t_now + 0.2 if wait_time <= 0: self.logger.error(' This is wonky - why is the wait_time ' 'less than zero? %.3f' % wait_time) self.logger.error(' corr synch epoch: %i' % self.corr.get_synch_time()) self.logger.error(' time.time(): %.10f' % t_now) self.logger.error(' time_from_mcnt: %.10f' % time_from_mcnt) self.logger.error(' ldmcnt: %i' % load_mcount) # hack wait_time = t_now + 4 self.logger.info(' Waiting %2.2f seconds for arm to ' 'trigger.' % wait_time) time.sleep(wait_time) def _vacc_sync_check_load_count(self, load_count0): """ Did the vaccs load counts increment correctly? :param load_count0: :return: """ vacc_status = self.vacc_status() load_count_new = vacc_status[self.hosts[0].host][0]['loadcount'] for host in self.hosts: for status in vacc_status[host.host]: if ((status['loadcount'] != load_count_new) or (status['loadcount'] != load_count0 + 1)): self.logger.error('vacc did not trigger!') self._vacc_sync_print_vacc_statuses(vacc_status) return False self.logger.info(' All vaccs triggered correctly.') return True def _vacc_sync_final_check(self): """ Check the vacc status, errors and accumulations :return: """ self.logger.info('\tChecking for errors & accumulations...') vac_okay = self._vacc_check_okay_initial() if not vac_okay: vacc_status = self.vacc_status() vacc_error_detail = THREADED_FPGA_FUNC( self.hosts, timeout=5, target_function='vacc_get_error_detail') self.logger.error('\t\txeng_vacc_sync: exited on vacc error') self.logger.error('\t\txeng_vacc_sync: vacc statii:') for host, item in vacc_status.items(): self.logger.error('\t\t\t%s: %s' % (host, str(item))) self.logger.error('\t\txeng_vacc_sync: vacc errors:') for host, item in vacc_error_detail.items(): self.logger.error('\t\t\t%s: %s' % (host, str(item))) self.logger.error('\t\txeng_vacc_sync: exited on vacc error') return False self.logger.info('\t...accumulations rolling in without error.') return True def _vacc_check_okay_initial(self): """ After an initial setup, is the vacc okay? Are the error counts zero and the counters ticking over? :return: True or False """ vacc_status = self.vacc_status() note_errors = False for host in self.hosts: for xeng_ctr, status in enumerate(vacc_status[host.host]): _msgpref = '{h}:{x} - '.format(h=host, x=xeng_ctr) errs = status['errors'] thresh = self.corr.qdr_vacc_error_threshold if (errs > 0) and (errs < thresh): self.logger.warn( '\t\t{pref}{thresh} > vacc errors > 0. Que ' 'pasa?'.format(pref=_msgpref, thresh=thresh)) note_errors = True elif (errs > 0) and (errs >= thresh): self.logger.error( '\t\t{pref}vacc errors > {thresh}. Problems.'.format( pref=_msgpref, thresh=thresh)) return False if status['count'] <= 0: self.logger.error( '\t\t{}vacc counts <= 0. Que pasa?'.format(_msgpref)) return False if note_errors: # investigate the errors further, what caused them? if self._vacc_non_parity_errors(): self.logger.error('\t\t\tsome vacc errors, but they\'re not ' 'parity errors. Problems.') return False self.logger.info('\t\tvacc_check_okay_initial: mostly okay, some ' 'QDR parity errors') else: self.logger.info('\t\tvacc_check_okay_initial: all okay') return True def _vacc_non_parity_errors(self): """ Are VACC errors other than parity errors occuring? :return: """ _loops = 2 parity_errors = 0 for ctr in range(_loops): detail = THREADED_FPGA_FUNC( self.hosts, timeout=5, target_function='vacc_get_error_detail') for xhost in detail: for vals in detail[xhost]: for field in vals: if vals[field] > 0: if field != 'parity': return True else: parity_errors += 1 if ctr < _loops - 1: time.sleep(self.get_acc_time() * 1.1) if parity_errors == 0: self.logger.error('\t\tThat\'s odd, VACC errors reported but ' 'nothing caused them?') return True return False def vacc_sync(self): """ Sync the vector accumulators on all the x-engines. Assumes that the x-engines are all receiving data. :return: the vacc synch time, in seconds since the UNIX epoch """ if self.vacc_synch_running.is_set(): self.logger.error('vacc_sync called when it was already running?') return self.vacc_synch_running.set() min_load_time = 2 attempts = 0 try: while True: attempts += 1 if attempts > MAX_VACC_SYNCH_ATTEMPTS: raise VaccSynchAttemptsMaxedOut( 'Reached maximum vacc synch attempts, aborting') # check if the vaccs need resetting self._vacc_sync_check_reset() # estimate the sync time, if needed self._vacc_sync_calc_load_mcount(time.time()) # work out the load time vacc_load_time = self._vacc_sync_create_loadtime(min_load_time) # set the vacc load time on the xengines load_mcount = self._vacc_sync_calc_load_mcount(vacc_load_time) # set the load mcount on the x-engines self.logger.info(' Applying load time: %i.' % load_mcount) THREADED_FPGA_FUNC( self.hosts, timeout=10, target_function=('vacc_set_loadtime', (load_mcount,),)) # check the current counts (arm_count0, load_count0) = self._vacc_sync_check_counts_initial() # arm the xhosts THREADED_FPGA_FUNC( self.hosts, timeout=10, target_function='vacc_arm') # did the arm count increase? if not self._vacc_sync_check_arm_count(arm_count0): continue # check the the load time was stored correctly if not self._vacc_sync_check_loadtimes(): continue # wait for the vaccs to arm self._vacc_sync_wait_for_arm(load_mcount) # check the status to see that the load count increased if not self._vacc_sync_check_load_count(load_count0): continue # allow vacc to flush and correctly populate parity bits: self.logger.info(' Waiting %2.2fs for an accumulation to ' 'flush, to correctly populate parity bits.' % self.get_acc_time()) time.sleep(self.get_acc_time() + 0.2) self.logger.info(' Clearing status and reseting counters.') THREADED_FPGA_FUNC(self.hosts, timeout=10, target_function='clear_status') # wait for a good accumulation to finish. self.logger.info(' Waiting %2.2fs for an accumulation to ' 'flush before checking counters.' % self.get_acc_time()) time.sleep(self.get_acc_time() + 0.2) # check the vacc status, errors and accumulations if not self._vacc_sync_final_check(): continue # done synch_time = self.corr.time_from_mcnt(load_mcount) self.vacc_synch_running.clear() return synch_time except KeyboardInterrupt: self.vacc_synch_running.clear() except VaccSynchAttemptsMaxedOut as e: self.vacc_synch_running.clear() self.logger.error(e.message) raise e def set_acc_time(self, acc_time_s, vacc_resync=True): """ Set the vacc accumulation length based on a required dump time, in seconds :param acc_time_s: new dump time, in seconds :param vacc_resync: force a vacc resynchronisation :return: """ if use_xeng_sim: raise RuntimeError('That\'s not an option anymore.') new_acc_len = ( (self.corr.sample_rate_hz * acc_time_s) / (self.corr.xeng_accumulation_len * self.corr.n_chans * 2.0)) new_acc_len = round(new_acc_len) self.corr.logger.info('set_acc_time: %.3fs -> new_acc_len(%i)' % (acc_time_s, new_acc_len)) self.set_acc_len(new_acc_len, vacc_resync) if self.corr.sensor_manager: sensor = self.corr.sensor_manager.sensor_get('integration-time') sensor.set_value(self.get_acc_time()) def get_acc_time(self): """ Get the dump time currently being used. Note: Will only be correct if accumulation time was set using this correlator object instance since cached values are used for the calculation. I.e., the number of accumulations are _not_ read from the FPGAs. :return: """ return (self.corr.xeng_accumulation_len * self.corr.accumulation_len * self.corr.n_chans * 2.0) / self.corr.sample_rate_hz def get_acc_len(self): """ Read the acc len currently programmed into the FPGA. :return: """ return self.hosts[0].registers.acc_len.read_uint() def set_acc_len(self, acc_len=None, vacc_resync=True): """ Set the QDR vector accumulation length. :param acc_len: :param vacc_resync: force a vacc resynchronisation :return: """ if (acc_len is not None) and (acc_len <= 0): _err = 'new acc_len of %i makes no sense' % acc_len self.logger.error(_err) raise RuntimeError(_err) reenable_timer = False if self.vacc_check_enabled.is_set(): self.vacc_check_timer_stop() reenable_timer = True if acc_len is not None: self.corr.accumulation_len = acc_len THREADED_FPGA_OP( self.hosts, timeout=10, target_function=( lambda fpga_: fpga_.registers.acc_len.write_int(self.corr.accumulation_len),)) if self.corr.sensor_manager: sensor = self.corr.sensor_manager.sensor_get('n-accs') sensor.set_value(self.corr.accumulation_len) self.logger.info('Set vacc accumulation length %d system-wide ' '(%.2f seconds)' % (self.corr.accumulation_len, self.get_acc_time())) self.corr.speadops.update_metadata([0x1015, 0x1016]) if vacc_resync: self.vacc_sync() if reenable_timer: self.vacc_check_timer_start() def xeng_tx_enable(self, data_stream): """ Start transmission of data streams from the x-engines :param data_stream - the data stream on which to act :return: """ dstrm = data_stream or self.data_stream THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.control.write(gbe_txen=True),)) self.logger.info('X-engine output enabled') def xeng_tx_disable(self, data_stream): """ Start transmission of data streams from the x-engines :param data_stream - the data stream on which to act :return: """ dstrm = data_stream or self.data_stream THREADED_FPGA_OP( self.hosts, timeout=5, target_function=( lambda fpga_: fpga_.registers.control.write(gbe_txen=False),)) self.logger.info('X-engine output disabled') def spead_meta_update_stream_destination(self): """ :return: """ meta_ig = self.data_stream.meta_ig self.corr.speadops.add_item( meta_ig, name='rx_udp_port', id=0x1022, description='Destination UDP port for %s data ' 'output.' % self.data_stream.name, shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.data_stream.destination.port) ipstr = numpy.array(str(self.data_stream.destination.ip)) self.corr.speadops.add_item( meta_ig, name='rx_udp_ip_str', id=0x1024, description='Destination IP address for %s data ' 'output.' % self.data_stream.name, shape=ipstr.shape, dtype=ipstr.dtype, value=ipstr) # x-engine-specific SPEAD operations def spead_meta_update_all(self): """ Update metadata for this correlator's xengine output. :return: """ meta_ig = self.data_stream.meta_ig self.corr.speadops.item_0x1007(meta_ig) self.corr.speadops.add_item( meta_ig, name='n_bls', id=0x1008, description='Number of baselines in the data stream.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=len(self.corr.baselines)) self.corr.speadops.add_item( meta_ig, name='n_chans', id=0x1009, description='Number of frequency channels in an integration.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.corr.n_chans) self.corr.speadops.item_0x100a(meta_ig) n_xengs = len(self.corr.xhosts) * self.corr.x_per_fpga self.corr.speadops.add_item( meta_ig, name='n_xengs', id=0x100B, description='The number of x-engines in the system.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=n_xengs) bls_ordering = numpy.array( [baseline for baseline in self.corr.baselines]) # this is a list of the baseline stream pairs, e.g. ['ant0x' 'ant0y'] self.corr.speadops.add_item( meta_ig, name='bls_ordering', id=0x100C, description='The baseline ordering in the output data stream.', shape=bls_ordering.shape, dtype=bls_ordering.dtype, value=bls_ordering) self.corr.speadops.item_0x100e(meta_ig) self.corr.speadops.add_item( meta_ig, name='center_freq', id=0x1011, description='The on-sky centre-frequency.', shape=[], format=[('f', 64)], value=int(self.corr.configd['fengine']['true_cf'])) self.corr.speadops.add_item( meta_ig, name='bandwidth', id=0x1013, description='The input (analogue) bandwidth of the system.', shape=[], format=[('f', 64)], value=int(self.corr.configd['fengine']['bandwidth'])) self.corr.speadops.item_0x1015(meta_ig) self.corr.speadops.item_0x1016(meta_ig) self.corr.speadops.item_0x101e(meta_ig) self.corr.speadops.add_item( meta_ig, name='xeng_acc_len', id=0x101F, description='Number of spectra accumulated inside X engine. ' 'Determines minimum integration time and ' 'user-configurable integration time stepsize. ' 'X-engine correlator internals.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.corr.xeng_accumulation_len) self.corr.speadops.item_0x1020(meta_ig) pkt_len = int(self.corr.configd['fengine']['10gbe_pkt_len']) self.corr.speadops.add_item( meta_ig, name='feng_pkt_len', id=0x1021, description='Payload size of 10GbE packet exchange between ' 'F and X engines in 64 bit words. Usually equal ' 'to the number of spectra accumulated inside X ' 'engine. F-engine correlator internals.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=pkt_len) self.spead_meta_update_stream_destination() port = int(self.corr.configd['fengine']['10gbe_port']) self.corr.speadops.add_item( meta_ig, name='feng_udp_port', id=0x1023, description='Port for F-engines 10Gbe links in the system.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=port) ipstr = numpy.array(self.corr.configd['fengine']['10gbe_start_ip']) self.corr.speadops.add_item( meta_ig, name='feng_start_ip', id=0x1025, description='Start IP address for F-engines in the system.', shape=ipstr.shape, dtype=ipstr.dtype, value=ipstr) self.corr.speadops.add_item( meta_ig, name='xeng_rate', id=0x1026, description='Target clock rate of processing engines (xeng).', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.corr.xeng_clk) self.corr.speadops.item_0x1027(meta_ig) x_per_fpga = int(self.corr.configd['xengine']['x_per_fpga']) self.corr.speadops.add_item( meta_ig, name='x_per_fpga', id=0x1041, description='Number of X engines per FPGA host.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=x_per_fpga) self.corr.speadops.add_item( meta_ig, name='ddc_mix_freq', id=0x1043, description='Digital downconverter mixing frequency as a fraction ' 'of the ADC sampling frequency. eg: 0.25. Set to zero ' 'if no DDC is present.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=0) self.corr.speadops.item_0x1045(meta_ig) self.corr.speadops.item_0x1046(meta_ig) self.corr.speadops.add_item( meta_ig, name='xeng_out_bits_per_sample', id=0x1048, description='The number of bits per value of the xeng ' 'accumulator output. Note this is for a ' 'single value, not the combined complex size.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.corr.xeng_outbits) self.corr.speadops.add_item( meta_ig, name='f_per_fpga', id=0x1049, description='Number of F engines per FPGA host.', shape=[], format=[('u', SPEAD_ADDRSIZE)], value=self.corr.f_per_fpga) self.corr.speadops.item_0x104a(meta_ig) self.corr.speadops.item_0x104b(meta_ig) self.corr.speadops.item_0x1400(meta_ig) self.corr.speadops.item_0x1600(meta_ig) self.corr.speadops.add_item( meta_ig, name='flags_xeng_raw', id=0x1601, description='Flags associated with xeng_raw data output. ' 'bit 34 - corruption or data missing during integration' 'bit 33 - overrange in data path ' 'bit 32 - noise diode on during integration ' 'bits 0 - 31 reserved for internal debugging', shape=[], format=[('u', SPEAD_ADDRSIZE)]) self.corr.speadops.add_item( meta_ig, name='xeng_raw', id=0x1800, description='Raw data for %i xengines in the system. This item ' 'represents a full spectrum (all frequency channels) ' 'assembled from lowest frequency to highest ' 'frequency. Each frequency channel contains the data ' 'for all baselines (n_bls given by SPEAD ID 0x100b). ' 'Each value is a complex number - two (real and ' 'imaginary) unsigned integers.' % n_xengs, # dtype=numpy.int32, dtype=numpy.dtype('>i4'), shape=[self.corr.n_chans, len(self.corr.baselines), 2]) # shape=[self.corr.n_chans * len(self.corr.baselines), 2]) def spead_meta_issue_all(self, data_stream): """ Issue = update the metadata then send it. :param data_stream: The DataStream object for which to send metadata :return: True if the callback transmits the metadata as well """ dstrm = data_stream or self.data_stream self.spead_meta_update_all() dstrm.meta_transmit() self.logger.info('Issued SPEAD data descriptor for data stream %s ' 'to %s.' % (dstrm.name, dstrm.meta_destination)) return True
class AsyncTaskManager(object): """ Aucote uses asynchronous task executed in ioloop. Some of them, especially scanners, should finish before ioloop will stop This class should be accessed by instance class method, which returns global instance of task manager """ _instances = {} TASKS_POLITIC_WAIT = 0 TASKS_POLITIC_KILL_WORKING_FIRST = 1 TASKS_POLITIC_KILL_PROPORTIONS = 2 TASKS_POLITIC_KILL_WORKING = 3 def __init__(self, parallel_tasks=10): self._shutdown_condition = Event() self._stop_condition = Event() self._cron_tasks = {} self._parallel_tasks = parallel_tasks self._tasks = Queue() self._task_workers = {} self._events = {} self._limit = self._parallel_tasks self._next_task_number = 0 self._toucan_keys = {} @classmethod def instance(cls, name=None, **kwargs): """ Return instance of AsyncTaskManager Returns: AsyncTaskManager """ if cls._instances.get(name) is None: cls._instances[name] = AsyncTaskManager(**kwargs) return cls._instances[name] @property def shutdown_condition(self): """ Event which is resolved if every job is done and AsyncTaskManager is ready to shutdown Returns: Event """ return self._shutdown_condition def start(self): """ Start CronTabCallback tasks Returns: None """ for task in self._cron_tasks.values(): task.start() for number in range(self._parallel_tasks): self._task_workers[number] = IOLoop.current().add_callback( partial(self.process_tasks, number)) self._next_task_number = self._parallel_tasks def add_crontab_task(self, task, cron, event=None): """ Add function to scheduler and execute at cron time Args: task (function): cron (str): crontab value event (Event): event which prevent from running task with similar aim, eg. security scans Returns: None """ if event is not None: event = self._events.setdefault(event, Event()) self._cron_tasks[task] = AsyncCrontabTask(cron, task, event) @gen.coroutine def stop(self): """ Stop CronTabCallback tasks and wait on them to finish Returns: None """ for task in self._cron_tasks.values(): task.stop() IOLoop.current().add_callback(self._prepare_shutdown) yield [self._stop_condition.wait(), self._tasks.join()] self._shutdown_condition.set() def _prepare_shutdown(self): """ Check if ioloop can be stopped Returns: None """ if any(task.is_running() for task in self._cron_tasks.values()): IOLoop.current().add_callback(self._prepare_shutdown) return self._stop_condition.set() def clear(self): """ Clear list of tasks Returns: None """ self._cron_tasks = {} self._shutdown_condition.clear() self._stop_condition.clear() async def process_tasks(self, number): """ Execute queue. Every task in executed in separated thread (_Executor) """ log.info("Starting worker %s", number) while True: try: item = self._tasks.get_nowait() try: log.debug("Worker %s: starting %s", number, item) thread = _Executor(task=item, number=number) self._task_workers[number] = thread thread.start() while thread.is_alive(): await sleep(0.5) except: log.exception("Worker %s: exception occurred", number) finally: log.debug("Worker %s: %s finished", number, item) self._tasks.task_done() tasks_per_scan = ( '{}: {}'.format(scanner, len(tasks)) for scanner, tasks in self.tasks_by_scan.items()) log.debug("Tasks left in queue: %s (%s)", self.unfinished_tasks, ', '.join(tasks_per_scan)) self._task_workers[number] = None except QueueEmpty: await gen.sleep(0.5) if self._stop_condition.is_set() and self._tasks.empty(): return finally: if self._limit < len(self._task_workers): break del self._task_workers[number] log.info("Closing worker %s", number) def add_task(self, task): """ Add task to the queue Args: task: Returns: None """ self._tasks.put(task) @property def unfinished_tasks(self): """ Task which are still processed or in queue Returns: int """ return self._tasks._unfinished_tasks @property def tasks_by_scan(self): """ Returns queued tasks grouped by scan """ tasks = self._tasks._queue return_value = {} for task in tasks: return_value.setdefault(task.context.scanner.NAME, []).append(task) return return_value @property def cron_tasks(self): """ List of cron tasks Returns: list """ return self._cron_tasks.values() def cron_task(self, name): for task in self._cron_tasks.values(): if task.func.NAME == name: return task def change_throttling_toucan(self, key, value): self.change_throttling(value) def change_throttling(self, new_value): """ Change throttling value. Keeps throttling value between 0 and 1. Behaviour of algorithm is described in docs/throttling.md Only working tasks are closing here. Idle workers are stop by themselves """ if new_value > 1: new_value = 1 if new_value < 0: new_value = 0 new_value = round(new_value * 100) / 100 old_limit = self._limit self._limit = round(self._parallel_tasks * float(new_value)) working_tasks = [ number for number, task in self._task_workers.items() if task is not None ] current_tasks = len(self._task_workers) task_politic = cfg['service.scans.task_politic'] if task_politic == self.TASKS_POLITIC_KILL_WORKING_FIRST: tasks_to_kill = current_tasks - self._limit elif task_politic == self.TASKS_POLITIC_KILL_PROPORTIONS: tasks_to_kill = round((old_limit - self._limit) * len(working_tasks) / self._parallel_tasks) elif task_politic == self.TASKS_POLITIC_KILL_WORKING: tasks_to_kill = (old_limit - self._limit) - ( len(self._task_workers) - len(working_tasks)) else: tasks_to_kill = 0 log.debug('%s tasks will be killed', tasks_to_kill) for number in working_tasks: if tasks_to_kill <= 0: break self._task_workers[number].stop() tasks_to_kill -= 1 self._limit = round(self._parallel_tasks * float(new_value)) current_tasks = len(self._task_workers) for number in range(self._limit - current_tasks): self._task_workers[self._next_task_number] = None IOLoop.current().add_callback( partial(self.process_tasks, self._next_task_number)) self._next_task_number += 1
class Queue(object): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. """ def __init__(self, maxsize=0): if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # Futures. self._putters = collections.deque([]) # Pairs of (item, Future). self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self): """Number of items allowed in the queue.""" return self._maxsize def qsize(self): """Number of items in the queue.""" return len(self._queue) def empty(self): return not self._queue def full(self): if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put(self, item, timeout=None): """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.gen.TimeoutError` after a timeout. """ try: self.put_nowait(item) except QueueFull: future = Future() self._putters.append((item, future)) _set_timeout(future, timeout) return future else: return gen._null_future def put_nowait(self, item): """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self._put(item) getter.set_result(self._get()) elif self.full(): raise QueueFull else: self._put(item) def get(self, timeout=None): """Remove and return an item from the queue. Returns a Future which resolves once an item is available, or raises `tornado.gen.TimeoutError` after a timeout. """ future = Future() try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self): """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self._put(item) putter.set_result(None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self): """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError('task_done() called too many times') self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout=None): """Block until all items in the queue are processed. Returns a Future. Returns a Future, which raises `tornado.gen.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def _init(self): self._queue = collections.deque() def _get(self): return self._queue.popleft() def _put(self, item): self._unfinished_tasks += 1 self._finished.clear() self._queue.append(item) def _consume_expired(self): # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self): return '<%s at %s %s>' % ( type(self).__name__, hex(id(self)), self._format()) def __str__(self): return '<%s %s>' % (type(self).__name__, self._format()) def _format(self): result = 'maxsize=%r' % (self.maxsize, ) if getattr(self, '_queue', None): result += ' queue=%r' % self._queue if self._getters: result += ' getters[%s]' % len(self._getters) if self._putters: result += ' putters[%s]' % len(self._putters) if self._unfinished_tasks: result += ' tasks=%s' % self._unfinished_tasks return result
class IndexManager(object): """ Keeps track of configured datastore indexes. """ # The node which keeps track of admin lock contenders. ADMIN_LOCK_NODE = '/appscale/datastore/index_manager_lock' def __init__(self, zk_client, datastore_access, perform_admin=False): """ Creates a new IndexManager. Args: zk_client: A kazoo.client.KazooClient object. datastore_access: A DatastoreDistributed object. perform_admin: A boolean specifying whether or not to perform admin operations. """ self.projects = {} self._wake_event = AsyncEvent() self._zk_client = zk_client self.admin_lock = AsyncKazooLock(self._zk_client, self.ADMIN_LOCK_NODE) # TODO: Refactor so that this dependency is not needed. self._ds_access = datastore_access self._zk_client.ensure_path('/appscale/projects') self._zk_client.ChildrenWatch('/appscale/projects', self._update_projects) # Since this manager can be used synchronously, ensure that the projects # are populated for this IOLoop iteration. project_ids = self._zk_client.get_children('/appscale/projects') self._update_projects_sync(project_ids) if perform_admin: IOLoop.current().spawn_callback(self._contend_for_admin_lock) def _update_projects_sync(self, new_project_ids): """ Updates the list of the deployment's projects. Args: new_project_ids: A list of strings specifying current project IDs. """ for project_id in new_project_ids: if project_id not in self.projects: self.projects[project_id] = ProjectIndexManager( project_id, self._zk_client, self, self._ds_access) if self.admin_lock.is_acquired: IOLoop.current().spawn_callback( self.projects[project_id].apply_definitions) for project_id in self.projects.keys(): if project_id not in new_project_ids: self.projects[project_id].active = False del self.projects[project_id] def _update_projects(self, project_ids): """ Watches for changes to list of existing projects. Args: project_ids: A list of strings specifying current project IDs. """ persistent_update_projects = retry_children_watch_coroutine( '/appscale/projects', self._update_projects_sync) IOLoop.instance().add_callback(persistent_update_projects, project_ids) def _handle_connection_change(self, state): """ Notifies the admin lock holder when the connection changes. Args: state: The new connection state. """ IOLoop.current().add_callback(self._wake_event.set) @gen.coroutine def _contend_for_admin_lock(self): """ Waits to acquire an admin lock that gives permission to apply index definitions. The lock is useful for preventing many servers from writing the same index entries at the same time. After acquiring the lock, the individual ProjectIndexManagers are responsible for mutating state whenever a project's index definitions change. """ while True: # Set up a callback to get notified if the ZK connection changes. self._wake_event.clear() self._zk_client.add_listener(self._handle_connection_change) yield self.admin_lock.acquire() try: for project_index_manager in self.projects.values(): IOLoop.current().spawn_callback( project_index_manager.apply_definitions) # Release the lock if the kazoo client gets disconnected. yield self._wake_event.wait() finally: self.admin_lock.release()
class JournalDBWriter(DBWriter, JournalReaderDelegate): """ ジャーナルファイルに書き込んでから、子Writerに書き込むWriter """ child_writer: DBWriter closed: bool journal_reader: 'JournalReader' journal_writer: 'JournalWriter' # storeになにか書き込まれたときに発火する。というかrunのloopを回したいときに発火する store_event: Event prefix: str def __init__(self, child_writer, dirpath, *, file_prefix='journal', max_log_file_size=1024 * 1024): # loop = tornado.ioloop.IOLoop.current() self.child_writer = child_writer self.prefix = os.path.join(dirpath, file_prefix) self.journal_reader, self.journal_writer = open_journal_reader_writer( self.prefix, max_log_file_size=max_log_file_size, delegate=self) self.closed = False self.store_event = Event() self.writer_loop = asyncio.ensure_future(self.run()) def __del__(self): if not self.closed: loop = tornado.ioloop.IOLoop.current(instance=False) assert loop, 'ioloop is closed before journal_writer shutting down' loop.run_sync(self.close) async def close(self): if self.closed: logger.warning('Journal is already closed') return logger.info('close JournalDBWriter') self.closed = True self.store_event.set() await self.writer_loop self.journal_reader.close() self.journal_writer.close() def touch(self): self.store_event.set() # override async def store(self, message_box, message) -> bool: self.journal_writer.write(message.to_json()) self.store_event.set() return True async def flush(self, flush_all=False): await self.child_writer.flush(flush_all=flush_all) # delegate def on_advance_log_file(self, new_log_file_index: int) -> None: """new_log_file_index - 1までのファイルを消す""" log_files = list_log_files(self.prefix) logger.info('Log files is advanced to %d, rotating log files...', new_log_file_index) for index, filepath in log_files: if index < new_log_file_index: try: os.unlink(filepath) logger.info('remove old log file %s', filepath) except Exception: logger.exception('Failed to remove old log file %s', filepath) # private async def run(self): """logファイルに新しいデータが書き込まれたら、child_writerに書込、posを進める""" assert not self.closed logger.debug('run started') while not self.closed: store_result = None try: async with self.journal_reader.read() as data: logger.debug('Read message from log : %r', data) if data: logger.debug('Try storing to child writer') try: store_result = await self.store_message_to_child( data) except BadMessage: logger.info('Skip bad message: %r', data) else: logger.debug('Store to child writer: %r', store_result) if not store_result: raise ChildWriteFailed() except ChildWriteFailed: # 書き込み失敗した場合、例外でwithから抜ける。 # そうするとJournalReaderはposを更新しない pass if data is None: # dataがなければ待つ logger.debug('Wait message...') elif not store_result: logger.debug('Store failed, wait reconnection') if not store_result or data is None: self.store_event.clear() try: await self.store_event.wait(timeout=EVENT_WAIT_TIMEOUT) except tornado.util.TimeoutError: # timeout pass else: self.store_event.clear() logger.debug('wakeup.') logger.debug('run finished') async def store_message_to_child(self, data: str) -> bool: parsed = json_decoder.decode(data) message_box = self.find_message_box(parsed['boxId']) message = ModuleMessage.from_json(parsed) return await self.child_writer.store(message_box, message) @functools.lru_cache() def find_message_box(self, box_id: str) -> MessageBox: # DBに直接触っちゃう try: box = cast(MessageBox, MessageBox.query.filter_by(uuid=box_id).one()) except NoResultFound: raise MessageBoxNotFoundError(box_id) return box
class SQSDrain(object): """Implementation of IDrain that writes to an AWS SQS queue. """ def __init__(self, logger, loop, sqs_client, metric_prefix='emitter'): self.emitter = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.output_error = Event() self.state = RUNNING self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self._send_queue = Queue() self._should_flush_queue = Event() self._flush_handle = None self.loop.spawn_callback(self._onSend) @gen.coroutine def _flush_send_batch(self, batch_size): send_batch = [ self._send_queue.get_nowait() for pos in range(min(batch_size, self.emitter.max_messages)) ] try: response = yield self.emitter.send_message_batch(*send_batch) except SQSError as err: self.logger.exception('Error encountered flushing data to SQS: %s', err) self.output_error.set() for msg in send_batch: self._send_queue.put_nowait(msg) else: if response.Failed: self.output_error.set() for req in response.Failed: self.logger.error('Message failed to send: %s', req.Id) self._send_queue.put_nowait(req) @gen.coroutine def _onSend(self): respawn = True while respawn: qsize = self._send_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: try: yield self._flush_send_batch(qsize) except Exception as err: self.logger.exception(err) self.output_error.set() qsize = self._send_queue.qsize() # We've cleared the backlog, remove any possible future flush if self._flush_handle: self.loop.remove_timeout(self._flush_handle) self._flush_handle = None self._should_flush_queue.clear() yield self._should_flush_queue.wait() @gen.coroutine def close(self, timeout=None): self.state = CLOSING yield self._send_queue.join(timeout) def emit_nowait(self, msg): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() raise QueueFull() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) self.logger.debug("Drain emitting") self._send_queue.put_nowait(msg) @gen.coroutine def emit(self, msg, timeout=None): if self._send_queue.qsize() >= self.emitter.max_messages: # Signal flush self._should_flush_queue.set() elif self._flush_handle is None: # Ensure we flush messages at least by MAX_TIMEOUT self._flush_handle = self.loop.add_timeout( MAX_TIMEOUT, lambda: self._should_flush_queue.set(), ) yield self._send_queue.put(msg, timeout)
class SQSSource(object): """Implementation of ISource that receives messages from a SQS queue. """ max_delete_delay = 5 def __init__(self, logger, loop, gate, sqs_client, metric_prefix='source'): self.gate = gate self.collector = sqs_client self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.end_of_input = Event() self.input_error = Event() self.state = RUNNING self._delete_queue = Queue() self._should_flush_queue = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self.loop.spawn_callback(self.onInput) self.loop.spawn_callback(self._onDelete) @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.warning('Closing source') yield self._delete_queue.join(timeout) @gen.coroutine def _flush_delete_batch(self, batch_size): delete_batch = [ self._delete_queue.get_nowait() for pos in range(min(batch_size, self.collector.max_messages)) ] try: response = yield self.collector.delete_message_batch(*delete_batch) except SQSError as err: lmsg = 'Error encountered deleting processed messages in SQS: %s' self.logger.exception(lmsg, err) self.input_error.set() for msg in delete_batch: self._delete_queue.put_nowait(msg) else: if response.Failed: self.input_error.set() for req in response.Failed: self.logger.error('Message failed to delete: %s', req.Id) self._delete_queue.put_nowait(req) @gen.coroutine def _onDelete(self): respawn = True while respawn: try: qsize = self._delete_queue.qsize() # This will keep flushing until clear, # including items that show up in between flushes while qsize > 0: yield self._flush_delete_batch(qsize) qsize = self._delete_queue.qsize() self._should_flush_queue.clear() yield self._should_flush_queue.wait() except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False @gen.coroutine def onInput(self): respawn = True retry_timeout = INITIAL_TIMEOUT # We use an algorithm similar to TCP window scaling, # so that we request fewer messages when we encounter # back pressure from our gate/drain and request more # when we flushed a complete batch window_size = self.collector.max_messages while respawn: try: response = yield self.collector.receive_message_batch( max_messages=window_size, ) if response.Messages: # We need to have low latency to delete messages # we've processed retry_timeout = INITIAL_TIMEOUT else: retry_timeout = min(retry_timeout * 2, MAX_TIMEOUT) yield gen.sleep(retry_timeout.total_seconds()) sent_full_batch = True for position, msg in enumerate(response.Messages): try: self.gate.put_nowait(msg) except QueueFull: self.logger.debug('Gate queue full; yielding') sent_full_batch = False # TODO: is it worth trying to batch and schedule # a flush at this point instead of many # single deletes? yield self.gate.put(msg) self._should_flush_queue.set() self._delete_queue.put_nowait(msg) statsd.increment('%s.queued' % self.metric_prefix, tags=[self.sender_tag]) # If we were able to flush the entire batch without waiting, # increase our window size to max_messages if sent_full_batch and \ window_size < self.collector.max_messages: window_size += 1 # Otherwise ask for less next time elif not sent_full_batch and window_size > 1: window_size -= 1 except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False
class TornadoReconnectionManager(ReconnectionManager): def __init__(self, pubnub): self._cancelled_event = Event() super(TornadoReconnectionManager, self).__init__(pubnub) @gen.coroutine def _register_heartbeat_timer(self): self._cancelled_event.clear() while not self._cancelled_event.is_set(): if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL: self._timer_interval = int(math.pow(2, self._connection_errors) - 1) if self._timer_interval > self.MAXEXPONENTIALBACKOFF: self._timer_interval = self.MINEXPONENTIALBACKOFF self._connection_errors = 1 logger.debug("timerInterval > MAXEXPONENTIALBACKOFF at: %s" % utils.datetime_now()) elif self._timer_interval < 1: self._timer_interval = self.MINEXPONENTIALBACKOFF logger.debug("timerInterval = %d at: %s" % (self._timer_interval, utils.datetime_now())) else: self._timer_interval = self.INTERVAL # >>> Wait given interval or cancel sleeper = tornado.gen.sleep(self._timer_interval) canceller = self._cancelled_event.wait() wi = tornado.gen.WaitIterator(canceller, sleeper) while not wi.done(): try: future = wi.next() yield future except Exception as e: # TODO: verify the error will not be eaten logger.error(e) raise else: if wi.current_future == sleeper: break elif wi.current_future == canceller: return else: raise Exception("unknown future raised") logger.debug("reconnect loop at: %s" % utils.datetime_now()) # >>> Attempt to request /time/0 endpoint try: yield self._pubnub.time().result() self._connection_errors = 1 self._callback.on_reconnect() logger.debug("reconnection manager stop due success time endpoint call: %s" % utils.datetime_now()) break except Exception: if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.EXPONENTIAL: logger.debug("reconnect interval increment at: %s" % utils.datetime_now()) self._connection_errors += 1 def start_polling(self): if self._pubnub.config.reconnect_policy == PNReconnectionPolicy.NONE: logger.warn("reconnection policy is disabled, please handle reconnection manually.") return self._pubnub.ioloop.spawn_callback(self._register_heartbeat_timer) def stop_polling(self): if self._cancelled_event is not None and not self._cancelled_event.is_set(): self._cancelled_event.set()
class ZMQSource(object): """Implementation of ISource that receives messages from a ZMQ socket. """ max_unyielded = 100000 def __init__(self, logger, loop, queue, zmq_socket, metric_prefix='source'): self.gate = queue self.collector = zmq_socket self.logger = logger self.loop = loop self.metric_prefix = metric_prefix self.end_of_input = Event() self.input_error = Event() self.state = RUNNING self._readable = Event() self.sender_tag = 'sender:%s.%s' % (self.__class__.__module__, self.__class__.__name__) self.loop.spawn_callback(self.onInput) def _handle_events(self, fd, events): if events & self.loop.ERROR: self.logger.error('Error polling socket for readability') elif events & self.loop.READ: self.loop.remove_handler(self.collector) self._readable.set() @gen.coroutine def _poll(self, retry_timeout=INITIAL_TIMEOUT): self.loop.add_handler(self.collector, self._handle_events, self.loop.READ) yield self._readable.wait() self._readable.clear() @gen.coroutine def close(self, timeout=None): self.state = CLOSING self.logger.warning('Closing source') self.collector.close() @gen.coroutine def onInput(self): # This will apply backpressure by not accepting input # until there is space in the queue. # This works because pyzmq uses Tornado to read from the socket; # reading from the socket will be blocked while the queue is full. respawn = True iterations = 0 while respawn: iterations += 1 try: msg = self.collector.recv_multipart(zmq.NOBLOCK) self.gate.put_nowait(msg) except zmq.Again: yield self._poll() except QueueFull: self.logger.debug('Gate queue full; yielding') yield self.gate.put(msg) except Exception as err: self.logger.exception(err) self.input_error.set() respawn = False else: statsd.increment('%s.queued' % self.metric_prefix, tags=[self.sender_tag]) finally: if respawn: if self.gate.transducer_concurrency > 1: yield gen.moment elif iterations > self.max_unyielded: yield gen.moment iterations = 0
class Queue(Generic[_T]): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) async def consumer(): async for item in q: try: print('Doing work on %s' % item) await gen.sleep(0.01) finally: q.task_done() async def producer(): for item in range(5): await q.put(item) print('Put %s' % item) async def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) await producer() # Wait for producer to put all tasks. await q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In versions of Python without native coroutines (before 3.5), ``consumer()`` could be written as:: @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ # Exact type depends on subclass. Could be another generic # parameter and use protocols to be more precise here. _queue = None # type: Any def __init__(self, maxsize: int = 0) -> None: if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # type: Deque[Future[_T]] self._putters = collections.deque([]) # type: Deque[Tuple[_T, Future[None]]] self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self) -> int: """Number of items allowed in the queue.""" return self._maxsize def qsize(self) -> int: """Number of items in the queue.""" return len(self._queue) def empty(self) -> bool: return not self._queue def full(self) -> bool: if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put( self, item: _T, timeout: Optional[Union[float, datetime.timedelta]] = None ) -> "Future[None]": """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() # type: Future[None] try: self.put_nowait(item) except QueueFull: self._putters.append((item, future)) _set_timeout(future, timeout) else: future.set_result(None) return future def put_nowait(self, item: _T) -> None: """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(getter, self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get( self, timeout: Optional[Union[float, datetime.timedelta]] = None ) -> Awaitable[_T]: """Remove and return an item from the queue. Returns an awaitable which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. .. note:: The ``timeout`` argument of this method differs from that of the standard library's `queue.Queue.get`. That method interprets numeric values as relative timeouts; this one interprets them as absolute deadlines and requires ``timedelta`` objects for relative timeouts (consistent with other timeouts in Tornado). """ future = Future() # type: Future[_T] try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self) -> _T: """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(putter, None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self) -> None: """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError("task_done() called too many times") self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join( self, timeout: Optional[Union[float, datetime.timedelta]] = None ) -> Awaitable[None]: """Block until all items in the queue are processed. Returns an awaitable, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def __aiter__(self) -> _QueueIterator[_T]: return _QueueIterator(self) # These three are overridable in subclasses. def _init(self) -> None: self._queue = collections.deque() def _get(self) -> _T: return self._queue.popleft() def _put(self, item: _T) -> None: self._queue.append(item) # End of the overridable methods. def __put_internal(self, item: _T) -> None: self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self) -> None: # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self) -> str: return "<%s at %s %s>" % (type(self).__name__, hex(id(self)), self._format()) def __str__(self) -> str: return "<%s %s>" % (type(self).__name__, self._format()) def _format(self) -> str: result = "maxsize=%r" % (self.maxsize,) if getattr(self, "_queue", None): result += " queue=%r" % self._queue if self._getters: result += " getters[%s]" % len(self._getters) if self._putters: result += " putters[%s]" % len(self._putters) if self._unfinished_tasks: result += " tasks=%s" % self._unfinished_tasks return result
class MockFitsWriterClient(object): """ Wrapper class for a KATCP client to a EddFitsWriterServer """ def __init__(self, address): """ @brief Construct new instance """ self._address = address self._ioloop = IOLoop.current() self._stop_event = Event() self._is_stopped = Condition() self._socket = None def reset_connection(self): self._socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self._socket.setblocking(False) try: self._socket.connect(self._address) except socket.error as error: if error.args[0] == errno.EINPROGRESS: pass else: raise error @coroutine def recv_nbytes(self, nbytes): received_bytes = 0 data = b'' while received_bytes < nbytes: if self._stop_event.is_set(): raise StopEvent try: log.debug("Requesting {} bytes".format(nbytes - received_bytes)) current_data = self._socket.recv(nbytes - received_bytes) received_bytes += len(current_data) data += current_data log.debug("Received {} bytes ({} of {} bytes)".format( len(current_data), received_bytes, nbytes)) except socket.error as error: error_id = error.args[0] if error_id == errno.EAGAIN or error_id == errno.EWOULDBLOCK: yield sleep(0.1) else: log.exception("Unexpected error on socket recv: {}".format( str(error))) raise error raise Return(data) @coroutine def recv_loop(self): try: header, sections = yield self.recv_packet() except StopEvent: log.debug("Notifying that recv calls have stopped") self._is_stopped.notify() except Exception: log.exception("Failure while receiving packet") else: self._ioloop.add_callback(self.recv_loop) def start(self): self._stop_event.clear() self.reset_connection() self._ioloop.add_callback(self.recv_loop) @coroutine def stop(self, timeout=2): self._stop_event.set() try: success = yield self._is_stopped.wait(timeout=self._ioloop.time() + timeout) if not success: raise TimeoutError except TimeoutError: log.error(("Could not stop the client within " "the {} second limit").format(timeout)) except Exception: log.exception("Fucup") @coroutine def recv_packet(self): log.debug("Receiving packet header") raw_header = yield self.recv_nbytes(C.sizeof(FWHeader)) log.debug("Converting packet header") header = FWHeader.from_buffer_copy(raw_header) log.info("Received header: {}".format(header)) fw_data_type = header.channel_data_type.strip().upper() c_data_type, np_data_type = TYPE_MAP[fw_data_type] sections = [] for section in range(header.nsections): log.debug("Receiving section {} of {}".format( section + 1, header.nsections)) raw_section_header = yield self.recv_nbytes( C.sizeof(FWSectionHeader)) section_header = FWSectionHeader.from_buffer_copy( raw_section_header) log.info("Section {} header: {}".format(section, section_header)) log.debug("Receiving section data") raw_bytes = yield self.recv_nbytes( C.sizeof(c_data_type) * section_header.nchannels) data = np.frombuffer(raw_bytes, dtype=np_data_type) log.info("Section {} data: {}".format(section, data[:10])) sections.append((section_header, data)) raise Return((header, sections))
class ConnectionPool(object): """ A maximum sized pool of Comm objects. This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(address) for address ...] >>> info = yield scheduler.identity() It creates enough comms to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing comms so that we don't have to continuously reconnect. It also maintains a comm limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling comms. If that doesn't do the trick then we wait until one of the occupied comms closes. Parameters ---------- limit: int The number of open comms to maintain at once deserialize: bool Whether or not to deserialize data by default or pass it through """ def __init__(self, limit=512, deserialize=True): self.open = 0 # Total number of open comms self.active = 0 # Number of comms currently in use self.limit = limit # Max number of open comms # Invariant: len(available) == open - active self.available = defaultdict(set) # Invariant: len(occupied) == active self.occupied = defaultdict(set) self.deserialize = deserialize self.event = Event() def __str__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) __repr__ = __str__ def __call__(self, addr=None, ip=None, port=None): """ Cached rpc objects """ addr = addr_from_args(addr=addr, ip=ip, port=port) return PooledRPCCall(addr, self) @gen.coroutine def connect(self, addr, timeout=3): """ Get a Comm to the given address. For internal use. """ available = self.available[addr] occupied = self.occupied[addr] if available: comm = available.pop() if not comm.closed(): self.active += 1 occupied.add(comm) raise gen.Return(comm) else: self.open -= 1 while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() self.open += 1 try: comm = yield connect(addr, timeout=timeout, deserialize=self.deserialize) except Exception: self.open -= 1 raise self.active += 1 occupied.add(comm) if self.open >= self.limit: self.event.clear() raise gen.Return(comm) def reuse(self, addr, comm): """ Reuse an open communication to the given address. For internal use. """ self.occupied[addr].remove(comm) self.active -= 1 if comm.closed(): self.open -= 1 if self.open < self.limit: self.event.set() else: self.available[addr].add(comm) def collect(self): """ Collect open but unused communications, to allow opening other ones. """ logger.info("Collecting unused comms. open: %d, active: %d", self.open, self.active) for addr, comms in self.available.items(): for comm in comms: comm.close() comms.clear() self.open = self.active if self.open < self.limit: self.event.set() def close(self): """ Close all communications abruptly. """ for comms in self.available.values(): for comm in comms: comm.abort() for comms in self.occupied.values(): for comm in comms: comm.abort()
class Queue(Generic[_T]): """Coordinate producer and consumer coroutines. If maxsize is 0 (the default) the queue size is unbounded. .. testcode:: from tornado import gen from tornado.ioloop import IOLoop from tornado.queues import Queue q = Queue(maxsize=2) async def consumer(): async for item in q: try: print('Doing work on %s' % item) await gen.sleep(0.01) finally: q.task_done() async def producer(): for item in range(5): await q.put(item) print('Put %s' % item) async def main(): # Start consumer without waiting (since it never finishes). IOLoop.current().spawn_callback(consumer) await producer() # Wait for producer to put all tasks. await q.join() # Wait for consumer to finish all tasks. print('Done') IOLoop.current().run_sync(main) .. testoutput:: Put 0 Put 1 Doing work on 0 Put 2 Doing work on 1 Put 3 Doing work on 2 Put 4 Doing work on 3 Doing work on 4 Done In versions of Python without native coroutines (before 3.5), ``consumer()`` could be written as:: @gen.coroutine def consumer(): while True: item = yield q.get() try: print('Doing work on %s' % item) yield gen.sleep(0.01) finally: q.task_done() .. versionchanged:: 4.3 Added ``async for`` support in Python 3.5. """ # Exact type depends on subclass. Could be another generic # parameter and use protocols to be more precise here. _queue = None # type: Any def __init__(self, maxsize: int = 0) -> None: if maxsize is None: raise TypeError("maxsize can't be None") if maxsize < 0: raise ValueError("maxsize can't be negative") self._maxsize = maxsize self._init() self._getters = collections.deque([]) # type: Deque[Future[_T]] self._putters = collections.deque([]) # type: Deque[Tuple[_T, Future[None]]] self._unfinished_tasks = 0 self._finished = Event() self._finished.set() @property def maxsize(self) -> int: """Number of items allowed in the queue.""" return self._maxsize def qsize(self) -> int: """Number of items in the queue.""" return len(self._queue) def empty(self) -> bool: return not self._queue def full(self) -> bool: if self.maxsize == 0: return False else: return self.qsize() >= self.maxsize def put( self, item: _T, timeout: Union[float, datetime.timedelta] = None ) -> "Future[None]": """Put an item into the queue, perhaps waiting until there is room. Returns a Future, which raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. """ future = Future() # type: Future[None] try: self.put_nowait(item) except QueueFull: self._putters.append((item, future)) _set_timeout(future, timeout) else: future.set_result(None) return future def put_nowait(self, item: _T) -> None: """Put an item into the queue without blocking. If no free slot is immediately available, raise `QueueFull`. """ self._consume_expired() if self._getters: assert self.empty(), "queue non-empty, why are getters waiting?" getter = self._getters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(getter, self._get()) elif self.full(): raise QueueFull else: self.__put_internal(item) def get(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[_T]: """Remove and return an item from the queue. Returns an awaitable which resolves once an item is available, or raises `tornado.util.TimeoutError` after a timeout. ``timeout`` may be a number denoting a time (on the same scale as `tornado.ioloop.IOLoop.time`, normally `time.time`), or a `datetime.timedelta` object for a deadline relative to the current time. .. note:: The ``timeout`` argument of this method differs from that of the standard library's `queue.Queue.get`. That method interprets numeric values as relative timeouts; this one interprets them as absolute deadlines and requires ``timedelta`` objects for relative timeouts (consistent with other timeouts in Tornado). """ future = Future() # type: Future[_T] try: future.set_result(self.get_nowait()) except QueueEmpty: self._getters.append(future) _set_timeout(future, timeout) return future def get_nowait(self) -> _T: """Remove and return an item from the queue without blocking. Return an item if one is immediately available, else raise `QueueEmpty`. """ self._consume_expired() if self._putters: assert self.full(), "queue not full, why are putters waiting?" item, putter = self._putters.popleft() self.__put_internal(item) future_set_result_unless_cancelled(putter, None) return self._get() elif self.qsize(): return self._get() else: raise QueueEmpty def task_done(self) -> None: """Indicate that a formerly enqueued task is complete. Used by queue consumers. For each `.get` used to fetch a task, a subsequent call to `.task_done` tells the queue that the processing on the task is complete. If a `.join` is blocking, it resumes when all items have been processed; that is, when every `.put` is matched by a `.task_done`. Raises `ValueError` if called more times than `.put`. """ if self._unfinished_tasks <= 0: raise ValueError("task_done() called too many times") self._unfinished_tasks -= 1 if self._unfinished_tasks == 0: self._finished.set() def join(self, timeout: Union[float, datetime.timedelta] = None) -> Awaitable[None]: """Block until all items in the queue are processed. Returns an awaitable, which raises `tornado.util.TimeoutError` after a timeout. """ return self._finished.wait(timeout) def __aiter__(self) -> _QueueIterator[_T]: return _QueueIterator(self) # These three are overridable in subclasses. def _init(self) -> None: self._queue = collections.deque() def _get(self) -> _T: return self._queue.popleft() def _put(self, item: _T) -> None: self._queue.append(item) # End of the overridable methods. def __put_internal(self, item: _T) -> None: self._unfinished_tasks += 1 self._finished.clear() self._put(item) def _consume_expired(self) -> None: # Remove timed-out waiters. while self._putters and self._putters[0][1].done(): self._putters.popleft() while self._getters and self._getters[0].done(): self._getters.popleft() def __repr__(self) -> str: return "<%s at %s %s>" % (type(self).__name__, hex(id(self)), self._format()) def __str__(self) -> str: return "<%s %s>" % (type(self).__name__, self._format()) def _format(self) -> str: result = "maxsize=%r" % (self.maxsize,) if getattr(self, "_queue", None): result += " queue=%r" % self._queue if self._getters: result += " getters[%s]" % len(self._getters) if self._putters: result += " putters[%s]" % len(self._putters) if self._unfinished_tasks: result += " tasks=%s" % self._unfinished_tasks return result
class ConnectionPool(object): """ A maximum sized pool of Comm objects. This provides a connect method that mirrors the normal distributed.connect method, but provides connection sharing and tracks connection limits. This object provides an ``rpc`` like interface:: >>> rpc = ConnectionPool(limit=512) >>> scheduler = rpc('127.0.0.1:8786') >>> workers = [rpc(address) for address ...] >>> info = yield scheduler.identity() It creates enough comms to satisfy concurrent connections to any particular address:: >>> a, b = yield [scheduler.who_has(), scheduler.has_what()] It reuses existing comms so that we don't have to continuously reconnect. It also maintains a comm limit to avoid "too many open file handle" issues. Whenever this maximum is reached we clear out all idling comms. If that doesn't do the trick then we wait until one of the occupied comms closes. Parameters ---------- limit: int The number of open comms to maintain at once deserialize: bool Whether or not to deserialize data by default or pass it through """ def __init__(self, limit=512, deserialize=True, serializers=None, deserializers=None, connection_args=None): self.limit = limit # Max number of open comms # Invariant: len(available) == open - active self.available = defaultdict(set) # Invariant: len(occupied) == active self.occupied = defaultdict(set) self.deserialize = deserialize self.serializers = serializers self.deserializers = deserializers if deserializers is not None else serializers self.connection_args = connection_args self.event = Event() @property def active(self): return sum(map(len, self.occupied.values())) @property def open(self): return self.active + sum(map(len, self.available.values())) def __repr__(self): return "<ConnectionPool: open=%d, active=%d>" % (self.open, self.active) def __call__(self, addr=None, ip=None, port=None): """ Cached rpc objects """ addr = addr_from_args(addr=addr, ip=ip, port=port) return PooledRPCCall(addr, self, serializers=self.serializers, deserializers=self.deserializers) @gen.coroutine def connect(self, addr, timeout=None): """ Get a Comm to the given address. For internal use. """ available = self.available[addr] occupied = self.occupied[addr] if available: comm = available.pop() if not comm.closed(): occupied.add(comm) raise gen.Return(comm) while self.open >= self.limit: self.event.clear() self.collect() yield self.event.wait() try: comm = yield connect(addr, timeout=timeout, deserialize=self.deserialize, connection_args=self.connection_args) except Exception: raise occupied.add(comm) if self.open >= self.limit: self.event.clear() raise gen.Return(comm) def reuse(self, addr, comm): """ Reuse an open communication to the given address. For internal use. """ try: self.occupied[addr].remove(comm) except KeyError: pass else: if comm.closed(): if self.open < self.limit: self.event.set() else: self.available[addr].add(comm) def collect(self): """ Collect open but unused communications, to allow opening other ones. """ logger.info("Collecting unused comms. open: %d, active: %d", self.open, self.active) for addr, comms in self.available.items(): for comm in comms: comm.close() comms.clear() if self.open < self.limit: self.event.set() def remove(self, addr): """ Remove all Comms to a given address. """ logger.info("Removing comms to %s", addr) if addr in self.available: comms = self.available.pop(addr) for comm in comms: comm.close() if addr in self.occupied: comms = self.occupied.pop(addr) for comm in comms: comm.close() if self.open < self.limit: self.event.set() def close(self): """ Close all communications abruptly. """ for comms in self.available.values(): for comm in comms: comm.abort() for comms in self.occupied.values(): for comm in comms: comm.abort()