def run(self): if self.filename == 'sys.stdout': self.file = sys.stdout elif self.filename == 'sys.stderr': self.file = sys.stderr else: self.file = open(self.filename, 'a+b') ioloop.install() loop = ioloop.IOLoop.instance() stream = None try: context = zmq.Context() socket = context.socket(zmq.SUB) socket.connect(self.addr) socket.setsockopt(zmq.SUBSCRIBE, '') stream = ZMQStream(socket) except Exception, err: print self.name, 'error getting outstream:', err exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) traceback.print_tb(exc_traceback, limit=30) if stream and not stream.closed(): stream.close()
def open(self): stream = None try: context = zmq.Context() socket = context.socket(zmq.SUB) socket.connect(self.addr) socket.setsockopt(zmq.SUBSCRIBE, '') stream = ZMQStream(socket) except Exception, err: exc_type, exc_value, exc_traceback = sys.exc_info() print 'ZMQStreamHandler ERROR getting ZMQ stream:', err traceback.print_exception(exc_type, exc_value, exc_traceback) if stream and not stream.closed(): stream.close()
def open(self): self.time_opened = time.time() stream = None try: context = zmq.Context() socket = context.socket(zmq.SUB) socket.connect(self.addr) socket.setsockopt(zmq.SUBSCRIBE, "") stream = ZMQStream(socket) except Exception, err: print "Error getting ZMQ stream:", err exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) traceback.print_tb(exc_traceback, limit=30) if stream and not stream.closed(): stream.close()
def open(self): self.time_opened = time.time() stream = None try: context = zmq.Context() socket = context.socket(zmq.SUB) socket.connect(self.addr) socket.setsockopt(zmq.SUBSCRIBE, '') stream = ZMQStream(socket) except Exception, err: print 'Error getting ZMQ stream:', err exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_exception(exc_type, exc_value, exc_traceback) traceback.print_tb(exc_traceback, limit=30) if stream and not stream.closed(): stream.close()
class MNWorker(MN_object): """Class for the MN worker side. Thin encapsulation of a zmq.DEALER socket. Provides a send method with optional timeout parameter. Will use a timeout to indicate a broker failure. :param context: the context to use for socket creation. :type context: zmq.Context :param endpoint: endpoint to connect to. :type endpoint: str :param service: the name of the service we support. :type service: byte-string """ _proto_version = b'MNPW01' # worker protocol version def __init__(self, context, endpoint, service, worker_type, address, protocols): """Initialize the MNWorker. """ self.context = context self.endpoint = endpoint self.service = service self.type = worker_type self.address = address self.protocols = protocols self.envelope = None self.HB_RETRIES = HB_RETRIES self.HB_INTERVAL = HB_INTERVAL self._data = {} self.stream = None self._tmo = None self.timed_out = False self.need_handshake = True self.connected = False self.ticker = None self._delayed_cb = None self._create_stream() _LOG.info("Worker initialized and can be found at '%s'" % endpoint) return def _create_stream(self): """Helper to create the socket and the stream. """ socket = self.context.socket(zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) self.ticker = PeriodicCallback(self._tick, self.HB_INTERVAL) self._send_ready() self.ticker.start() return def _send_ready(self): """Helper method to prepare and send the workers READY message. """ _LOG.debug("Informing broker I am ready") ready_msg = [ b'', WORKER_PROTO, MSG_READY, self.service, self.type, self.address, self.protocols ] if self.stream.closed(): self.shutdown() self.stream.send_multipart(ready_msg) self.curr_retries = self.HB_RETRIES return def _tick(self): """Method called every HB_INTERVAL milliseconds. """ self.curr_retries -= 1 self.send_hb() if self.curr_retries >= 0: return # connection seems to be dead self.shutdown() # try to recreate it # self._delayed_cb = IOLoop.call_later(self._create_stream, 5000) # self._delayed_cb = IOLoop.add_timeout(self._create_stream, 5000) self._delayed_cb = DelayedCallback(self._create_stream, self.HB_INTERVAL) self._delayed_cb.start() return def send_hb(self): """Construct and send HB message to broker. """ _LOG.debug("Sending heartbeat") msg = [b'', WORKER_PROTO, MSG_HEARTBEAT] if self.stream.closed(): self.shutdown() self.stream.send_multipart(msg) return def shutdown(self): """Method to deactivate the worker connection completely. Will delete the stream and the underlying socket. """ if self.ticker: self.ticker.stop() self.ticker = None if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None self.timed_out = False self.need_handshake = True self.connected = False return def reply(self, msg): """Send the given message. :param msg: full message to send. :type msg: can either be a byte-string or a list of byte-strings """ if self.need_handshake: raise ConnectionNotReadyError() to_send = self.envelope self.envelope = None if isinstance(msg, list): to_send.extend(msg) else: to_send.append(msg) if self.stream.closed(): self.shutdown() self.stream.send_multipart(to_send) return def _on_message(self, msg): """Helper method called on message receive. :param msg: a list w/ the message parts :type msg: a list of byte-strings """ _LOG.debug("Received: %s." % msg) # 1st part is empty msg.pop(0) # 2nd part is protocol version proto = msg.pop(0) if proto != WORKER_PROTO: # ignore message from not supported protocol pass # 3rd part is message type msg_type = msg.pop(0) # XXX: hardcoded message types! # any message resets the retries counter self.need_handshake = False self.curr_retries = self.HB_RETRIES if msg_type == MSG_DISCONNECT: # disconnect _LOG.info("Broker wants us to disconnect.") self.curr_retries = 0 # reconnect will be triggered by hb timer elif msg_type == MSG_QUERY: # request # remaining parts are the user message _LOG.debug("Received new request: %s." % msg) envelope, msg = split_address(msg) envelope.append(b'') envelope = [b'', WORKER_PROTO, MSG_REPLY] + envelope # reply self.envelope = envelope self.on_request(msg) else: # invalid message # ignored _LOG.debug('ignoring message with invalid id') pass return def on_request(self, msg): """Public method called when a request arrived. :param msg: a list w/ the message parts :type msg: a list of byte-strings Must be overloaded to provide support for various services! """ pass
class Connection(object): """ This is a base class describing a single connection of client from web browser. """ # maximum auth validation requests before returning error to client MAX_AUTH_ATTEMPTS = 5 # interval unit in milliseconds for back off BACK_OFF_INTERVAL = 100 # maximum timeout between authorization attempts in back off BACK_OFF_MAX_TIMEOUT = 5000 def close_connection(self): """ General method for closing connection. """ if isinstance(self, (SockJSConnection, )): self.close() def send_message(self, message): """ Send message to client """ if isinstance(self, SockJSConnection): self.send(message) def send_ack(self, msg_id=None, method=None, result=None, error=None): self.send_message( self.make_ack( msg_id=msg_id, method=method, result=result, error=error ) ) def make_ack(self, msg_id=None, method=None, result=None, error=None): to_return = { 'ack': True, 'id': msg_id, 'method': method, 'result': result, 'error': error } return json_encode(to_return) @coroutine def handle_auth(self, params): if self.is_authenticated: raise Return((True, None)) token = params["token"] user = params["user"] project_id = params['project_id'] permissions = params["permissions"] project, error = yield state.get_project_by_id(project_id) if error: self.close_connection() if not project: raise Return((None, "project not found")) secret_key = project['secret_key'] if token != auth.get_client_token(secret_key, project_id, user): raise Return((None, "invalid token")) if user and project.get('validate_url', None): http_client = AsyncHTTPClient() request = HTTPRequest( project['validate_url'], method="POST", body=json_encode({'user': user, 'permissions': permissions}), request_timeout=1 ) max_auth_attempts = project.get( 'auth_attempts' ) or self.MAX_AUTH_ATTEMPTS back_off_interval = project.get( 'back_off_interval' ) or self.BACK_OFF_INTERVAL back_off_max_timeout = project.get( 'back_off_max_timeout' ) or self.BACK_OFF_MAX_TIMEOUT attempts = 0 while attempts < max_auth_attempts: # get current timeout for project current_attempts = self.application.back_off.setdefault(project_id, 0) factor = random.randint(0, 2**current_attempts-1) timeout = factor*back_off_interval if timeout > back_off_max_timeout: timeout = back_off_max_timeout # wait before next authorization request attempt yield sleep(float(timeout)/1000) try: response = yield http_client.fetch(request) except BaseException: # let it fail and try again after some timeout # until we have auth attempts pass else: # reset back-off attempts self.application.back_off[project_id] = 0 if response.code == 200: self.is_authenticated = True break elif response.code == 403: raise Return((None, "permission denied")) attempts += 1 self.application.back_off[project_id] += 1 else: self.is_authenticated = True if not self.is_authenticated: raise Return((None, "permission validation error")) categories, error = yield state.get_project_categories(project) if error: self.close_connection() self.categories = {} for category in categories: if not permissions or (permissions and category['name'] in permissions): self.categories[category['name']] = category self.uid = uuid.uuid4().hex self.project = project self.permissions = permissions self.user = user self.channels = {} self.start_heartbeat() # allow broadcast from client only into bidirectional categories self.bidirectional_categories = {} for category_name, category in six.iteritems(self.categories): if category.get('bidirectional', False): self.bidirectional_categories[category_name] = category context = zmq.Context() subscribe_socket = context.socket(zmq.SUB) if self.application.zmq_pub_sub_proxy: subscribe_socket.connect(self.application.zmq_xpub) else: for address in self.application.zmq_sub_address: subscribe_socket.connect(address) self.sub_stream = ZMQStream(subscribe_socket) self.sub_stream.on_recv(self.on_message_published) raise Return((True, None)) @coroutine def handle_subscribe(self, params): """ Subscribe authenticated connection on channels. """ subscribe_to = params.get('to') if not subscribe_to: raise Return((True, None)) project_id = self.project['_id'] connections = self.application.connections if project_id not in connections: connections[project_id] = {} if self.user and self.user not in connections: connections[project_id][self.user] = {} if self.user: connections[project_id][self.user][self.uid] = self for category_name, channels in six.iteritems(subscribe_to): if category_name not in self.categories: # attempt to subscribe on not allowed category continue if not channels or not isinstance(channels, list): # attempt to subscribe without channels provided continue category_id = self.categories[category_name]['_id'] allowed_channels = self.permissions.get(category_name) if self.permissions else [] for channel in channels: if not isinstance(allowed_channels, list): continue if allowed_channels and channel not in allowed_channels: # attempt to subscribe on not allowed channel continue channel_to_subscribe = rpc.create_channel_name( project_id, category_id, channel ) self.sub_stream.setsockopt_string( zmq.SUBSCRIBE, six.u(channel_to_subscribe) ) if category_name not in self.channels: self.channels[category_name] = {} self.channels[category_name][channel_to_subscribe] = True raise Return((True, None)) @coroutine def handle_unsubscribe(self, params): unsubscribe_from = params.get('from') if not unsubscribe_from: raise Return((True, None)) project_id = self.project['_id'] for category_name, channels in six.iteritems(unsubscribe_from): if category_name not in self.categories: # attempt to unsubscribe from not allowed category continue if not channels or not isinstance(channels, list): # attempt to unsubscribe from unknown channels continue category_id = self.categories[category_name]['_id'] for channel in channels: allowed_channels = self.permissions[category_name] if self.permissions else [] if allowed_channels and channel not in allowed_channels: # attempt to unsubscribe from not allowed channel continue channel_to_unsubscribe = rpc.create_channel_name( project_id, category_id, channel ) self.sub_stream.setsockopt_string( zmq.UNSUBSCRIBE, six.u(channel_to_unsubscribe) ) try: del self.channels[category_name][channel_to_unsubscribe] except KeyError: pass raise Return((True, None)) @coroutine def handle_broadcast(self, params): category = params.get('category') channel = params.get('channel') if category not in self.categories: raise Return((None, 'category does not exist or permission denied')) if category not in self.bidirectional_categories: raise Return((None, 'one-way category')) allowed_channels = self.permissions.get(category) if self.permissions else [] if allowed_channels and channel not in allowed_channels: # attempt to broadcast into not allowed channel raise Return((None, 'channel permission denied')) result, error = yield rpc.process_broadcast( self.application, self.project, self.bidirectional_categories, params ) raise Return((result, error)) @coroutine def on_centrifuge_connection_message(self, message): """ Called when message from client received. """ try: data = json_decode(message) except ValueError: self.send_ack(error='malformed JSON data') raise Return(False) try: validate(data, req_schema) except ValidationError as e: self.send_ack(error=str(e)) msg_id = data.get('id', None) method = data.get('method') params = data.get('params') if method != 'auth' and not self.is_authenticated: self.send_ack(error='unauthorized') raise Return(True) func = getattr(self, 'handle_%s' % method, None) if not func: self.send_ack( msg_id=msg_id, method=method, error="unknown method %s" % method ) try: validate(params, client_params_schema[method]) except ValidationError as e: self.send_ack(msg_id=msg_id, method=method, error=str(e)) raise Return(True) result, error = yield func(params) self.send_ack(msg_id=msg_id, method=method, result=result, error=error) raise Return(True) def start_heartbeat(self): """ In ideal case we work with websocket connections with heartbeat available by default. But there are lots of other transports whose heartbeat must be started manually. Do it here. """ if isinstance(self, SockJSConnection): if self.session: if self.session.transport_name != 'rawwebsocket': self.session.start_heartbeat() else: self.close_connection() def on_message_published(self, message): """ Called when message received from one of channels client subscribed to. """ actual_message = message[0] if six.PY3: actual_message = actual_message.decode() self.send_message( actual_message.split(rpc.CHANNEL_DATA_SEPARATOR, 1)[1] ) def clean_up(self): """ Unsubscribe connection from channels, clean up zmq sockets. """ if hasattr(self, 'sub_stream') and not self.sub_stream.closed(): self.sub_stream.on_recv(None) self.sub_stream.close() if not self.is_authenticated: return project_id = self.project['_id'] connections = self.application.connections self.channels = None if not project_id in connections: return if not self.user in connections[project_id]: return try: del connections[project_id][self.user][self.uid] except KeyError: pass # clean connections if not connections[project_id][self.user]: try: del connections[project_id][self.user] except KeyError: pass if not connections[project_id]: try: del connections[project_id] except KeyError: pass def on_centrifuge_connection_open(self): logger.info('client connected') self.is_authenticated = False def on_centrifuge_connection_close(self): logger.info('client disconnected') self.clean_up()
class Connection(object): """ This is a base class describing a single connection of client from web browser. """ # maximum auth validation requests before returning error to client MAX_AUTH_ATTEMPTS = 5 # interval unit in milliseconds for back off BACK_OFF_INTERVAL = 100 # maximum timeout between authorization attempts in back off BACK_OFF_MAX_TIMEOUT = 5000 def close_connection(self): """ General method for closing connection. """ if isinstance(self, (SockJSConnection, )): self.close() def send_message(self, message): """ Send message to client """ if isinstance(self, SockJSConnection): self.send(message) def send_ack(self, msg_id=None, method=None, result=None, error=None): self.send_message( self.make_ack(msg_id=msg_id, method=method, result=result, error=error)) def make_ack(self, msg_id=None, method=None, result=None, error=None): to_return = { 'ack': True, 'id': msg_id, 'method': method, 'result': result, 'error': error } return json_encode(to_return) @coroutine def handle_auth(self, params): if self.is_authenticated: raise Return((True, None)) token = params["token"] user = params["user"] project_id = params['project_id'] permissions = params["permissions"] project, error = yield state.get_project_by_id(project_id) if error: self.close_connection() if not project: raise Return((None, "project not found")) secret_key = project['secret_key'] if token != auth.get_client_token(secret_key, project_id, user): raise Return((None, "invalid token")) if user and project.get('validate_url', None): http_client = AsyncHTTPClient() request = HTTPRequest(project['validate_url'], method="POST", body=json_encode({ 'user': user, 'permissions': permissions }), request_timeout=1) max_auth_attempts = project.get( 'auth_attempts') or self.MAX_AUTH_ATTEMPTS back_off_interval = project.get( 'back_off_interval') or self.BACK_OFF_INTERVAL back_off_max_timeout = project.get( 'back_off_max_timeout') or self.BACK_OFF_MAX_TIMEOUT attempts = 0 while attempts < max_auth_attempts: # get current timeout for project current_attempts = self.application.back_off.setdefault( project_id, 0) factor = random.randint(0, 2**current_attempts - 1) timeout = factor * back_off_interval if timeout > back_off_max_timeout: timeout = back_off_max_timeout # wait before next authorization request attempt yield sleep(float(timeout) / 1000) try: response = yield http_client.fetch(request) except BaseException: # let it fail and try again after some timeout # until we have auth attempts pass else: # reset back-off attempts self.application.back_off[project_id] = 0 if response.code == 200: self.is_authenticated = True break elif response.code == 403: raise Return((None, "permission denied")) attempts += 1 self.application.back_off[project_id] += 1 else: self.is_authenticated = True if not self.is_authenticated: raise Return((None, "permission validation error")) categories, error = yield state.get_project_categories(project) if error: self.close_connection() self.categories = {} for category in categories: if not permissions or (permissions and category['name'] in permissions): self.categories[category['name']] = category self.uid = uuid.uuid4().hex self.project = project self.permissions = permissions self.user = user self.channels = {} self.start_heartbeat() # allow broadcast from client only into bidirectional categories self.bidirectional_categories = {} for category_name, category in six.iteritems(self.categories): if category.get('bidirectional', False): self.bidirectional_categories[category_name] = category context = zmq.Context() subscribe_socket = context.socket(zmq.SUB) if self.application.zmq_pub_sub_proxy: subscribe_socket.connect(self.application.zmq_xpub) else: for address in self.application.zmq_sub_address: subscribe_socket.connect(address) self.sub_stream = ZMQStream(subscribe_socket) self.sub_stream.on_recv(self.on_message_published) raise Return((True, None)) @coroutine def handle_subscribe(self, params): """ Subscribe authenticated connection on channels. """ subscribe_to = params.get('to') if not subscribe_to: raise Return((True, None)) project_id = self.project['_id'] connections = self.application.connections if project_id not in connections: connections[project_id] = {} if self.user and self.user not in connections: connections[project_id][self.user] = {} if self.user: connections[project_id][self.user][self.uid] = self for category_name, channels in six.iteritems(subscribe_to): if category_name not in self.categories: # attempt to subscribe on not allowed category continue if not channels or not isinstance(channels, list): # attempt to subscribe without channels provided continue category_id = self.categories[category_name]['_id'] allowed_channels = self.permissions.get( category_name) if self.permissions else [] for channel in channels: if not isinstance(allowed_channels, list): continue if allowed_channels and channel not in allowed_channels: # attempt to subscribe on not allowed channel continue channel_to_subscribe = rpc.create_channel_name( project_id, category_id, channel) self.sub_stream.setsockopt_string(zmq.SUBSCRIBE, six.u(channel_to_subscribe)) if category_name not in self.channels: self.channels[category_name] = {} self.channels[category_name][channel_to_subscribe] = True raise Return((True, None)) @coroutine def handle_unsubscribe(self, params): unsubscribe_from = params.get('from') if not unsubscribe_from: raise Return((True, None)) project_id = self.project['_id'] for category_name, channels in six.iteritems(unsubscribe_from): if category_name not in self.categories: # attempt to unsubscribe from not allowed category continue if not channels or not isinstance(channels, list): # attempt to unsubscribe from unknown channels continue category_id = self.categories[category_name]['_id'] for channel in channels: allowed_channels = self.permissions[ category_name] if self.permissions else [] if allowed_channels and channel not in allowed_channels: # attempt to unsubscribe from not allowed channel continue channel_to_unsubscribe = rpc.create_channel_name( project_id, category_id, channel) self.sub_stream.setsockopt_string( zmq.UNSUBSCRIBE, six.u(channel_to_unsubscribe)) try: del self.channels[category_name][channel_to_unsubscribe] except KeyError: pass raise Return((True, None)) @coroutine def handle_broadcast(self, params): category = params.get('category') channel = params.get('channel') if category not in self.categories: raise Return( (None, 'category does not exist or permission denied')) if category not in self.bidirectional_categories: raise Return((None, 'one-way category')) allowed_channels = self.permissions.get( category) if self.permissions else [] if allowed_channels and channel not in allowed_channels: # attempt to broadcast into not allowed channel raise Return((None, 'channel permission denied')) result, error = yield rpc.process_broadcast( self.application, self.project, self.bidirectional_categories, params) raise Return((result, error)) @coroutine def on_centrifuge_connection_message(self, message): """ Called when message from client received. """ try: data = json_decode(message) except ValueError: self.send_ack(error='malformed JSON data') raise Return(False) try: validate(data, req_schema) except ValidationError as e: self.send_ack(error=str(e)) msg_id = data.get('id', None) method = data.get('method') params = data.get('params') if method != 'auth' and not self.is_authenticated: self.send_ack(error='unauthorized') raise Return(True) func = getattr(self, 'handle_%s' % method, None) if not func: self.send_ack(msg_id=msg_id, method=method, error="unknown method %s" % method) try: validate(params, client_params_schema[method]) except ValidationError as e: self.send_ack(msg_id=msg_id, method=method, error=str(e)) raise Return(True) result, error = yield func(params) self.send_ack(msg_id=msg_id, method=method, result=result, error=error) raise Return(True) def start_heartbeat(self): """ In ideal case we work with websocket connections with heartbeat available by default. But there are lots of other transports whose heartbeat must be started manually. Do it here. """ if isinstance(self, SockJSConnection): if self.session: if self.session.transport_name != 'rawwebsocket': self.session.start_heartbeat() else: self.close_connection() def on_message_published(self, message): """ Called when message received from one of channels client subscribed to. """ actual_message = message[0] if six.PY3: actual_message = actual_message.decode() self.send_message( actual_message.split(rpc.CHANNEL_DATA_SEPARATOR, 1)[1]) def clean_up(self): """ Unsubscribe connection from channels, clean up zmq sockets. """ if hasattr(self, 'sub_stream') and not self.sub_stream.closed(): self.sub_stream.on_recv(None) self.sub_stream.close() if not self.is_authenticated: return project_id = self.project['_id'] connections = self.application.connections self.channels = None if not project_id in connections: return if not self.user in connections[project_id]: return try: del connections[project_id][self.user][self.uid] except KeyError: pass # clean connections if not connections[project_id][self.user]: try: del connections[project_id][self.user] except KeyError: pass if not connections[project_id]: try: del connections[project_id] except KeyError: pass def on_centrifuge_connection_open(self): logger.info('client connected') self.is_authenticated = False def on_centrifuge_connection_close(self): logger.info('client disconnected') self.clean_up()
class MNBroker(MN_object): """The Dedalus broker class. The broker routes messages from clients to appropriate nodes/workers based on the requested data. It will also allow workers to register to different services as well as files. This base class defines the overall functionality and the API. Subclasses are meant to implement additional features (like logging). .. note:: The workers will *always* be served by the `main_ep` endpoint. In a two-endpoint setup clients will be handled via the `opt_ep` endpoint. :param context: the context to use for socket creation. :type context: zmq.Context :param main_ep: the primary endpoint for workers and clients. :type main_ep: str :param opt_ep: is an optional 2nd endpoint. :type opt_ep: str :param service_q: the class to be used for the service worker-queue. :type service_q: class :param data_q: the class to be used for the data-queue. :type data_q: class :Example: context = zmq.Context() broker = MNBrokerRunner(context, "tcp://127.0.0.1:5555") IOLoop.instance().start() broker.shutdown()! .. seealso:: :class:`MNWorker` .. warnings also:: the broker is expected to be able to serve a few thousand clients, no guarantees yet though .. note:: this is by far not a ready product """ # TODO: if a worker is doing a long running task we probably shouldn't keep it in the heartbeat timer list # TODO: add two brokers using b-star # TODO: implement the titanic scheme for added reliability in case of disjoint req/rep def __init__(self, context, main_ep, opt_ep=None, service_q=None, data_q=None): """Init MNBroker instance. """ if service_q is None: self.service_q = ServiceQueue else: self.service_q = service_q if data_q is None: self.data_q = ServiceQueue else: self.data_q = data_q socket = context.socket(zmq.ROUTER) socket.bind(main_ep) socket.setsockopt(zmq.IDENTITY, b'BROKER') self.main_stream = ZMQStream(socket) self.main_stream.on_recv(self.on_message) if opt_ep: socket = context.socket(zmq.ROUTER) socket.bind(opt_ep) self.client_stream = ZMQStream(socket) self.client_stream.on_recv(self.on_message) else: self.client_stream = self.main_stream # TODO: merge worker_tracker and info self._workers = {} self._workers_info = {} self._services = { } # TODO: each worker must have his own request queue self._worker_cmds = { MSG_READY: self.on_ready, MSG_REPLY: self.on_reply, MSG_HEARTBEAT: self.on_heartbeat, MSG_DISCONNECT: self.on_disconnect, } self._local_cmds = { MSG_WINFO: self.get_workers_info, } self.hb_check_timer = PeriodicCallback(self.on_timer, HB_INTERVAL) self.hb_check_timer.start() self.hb_get_winfo = PeriodicCallback(self.collect_workers_info, HB_INTERVAL) self.hb_get_winfo.start() self.register_worker_info(self.main_stream.getsockopt( zmq.IDENTITY)) # register this instance _LOG.info("Broker initialized and can be found at '%s'" % main_ep) return def register_worker(self, wid, service, worker_type, address, protocols): """Register the worker id and add it to the given service. Does nothing if worker is already known. :param wid: the worker id. :type wid: str :param service: the service name. :type service: str :param worker_type: the type of the worker. :type worker_type: str :param address: the ipv4 or upv6 address of the worker. :type address: str :param protocols: the routing protocols reported by the worker. :type protocols: str :rtype: None """ if wid in self._workers: return self._workers[wid] = WorkerTracker(WORKER_PROTO, wid, service, self.main_stream) # If service exists then add this worker to its workers queue, if not create it. if service in self._services: wq, wr = self._services[service] wq.put(wid) else: q = self.service_q() q.put(wid) self._services[service] = (q, []) self.register_worker_info(wid, service, WORKER_ONLINE_STATUS, worker_type, address, protocols) _LOG.info( "New worker of type: '%s' registered with id: '%s', for service: '%s' and can be found at '%s'." % (worker_type, bytes_to_hexstring(wid), service, address)) return def unregister_worker(self, wid): """Unregister the worker with the given id and stop all timers for the worker. If the worker id is not registered, nothing happens. :param wid: the worker id. :type wid: str :rtype: None """ try: wtracker = self._workers[wid] except KeyError: # not registered, ignore return # remove this workers' data from the map wtracker.shutdown() service = wtracker.service if service in self._services: wq, wr = self._services[service] wq.remove(wid) del self._workers[wid] self.reset_node_info(wid) _LOG.info("Worker with id: '%s' was removed from the pool." % bytes_to_hexstring(wid)) return def disconnect(self, wid): """Send disconnect command and unregister worker. If the worker id is not registered, nothing happens. :param wid: the worker id. :type wid: str :rtype: None """ try: wtracker = self._workers[wid] except KeyError: # not registered, ignore return _LOG.info("Requesting from worker with id: '%s' to disconnect." % bytes_to_hexstring(wid)) to_send = [wid, b'', WORKER_PROTO, MSG_DISCONNECT] if self.main_stream.closed(): self.shutdown() self.main_stream.send_multipart(to_send) self.unregister_worker(wid) return def client_response(self, rp, service, cmd, wid, msg): """Package and send reply to client. The message will contain the protocol used to serve this update, the service used, as well as echo back the requested command id and worker id. :param rp: return address stack :type rp: list of str :param service: name of service :type service: str :param cmd: id of the operation requested by the client :type cmd: str :param wid: id of the worker that is replying :type wid: str :param msg: message parts :type msg: list of str :rtype: None """ _LOG.debug("Replying to client %s regarding request [%s]." % (rp, cmd)) to_send = rp[:] to_send.extend([b'', CLIENT_PROTO, service, cmd, str(wid)]) to_send.extend(msg) if self.client_stream.closed(): self.shutdown() self.client_stream.send_multipart(to_send) return def shutdown(self): """Shutdown broker. Will unregister all workers, stop all timers and ignore all further messages. .. warning:: The instance MUST not be used after :func:`shutdown` has been called. :rtype: None """ if self.client_stream == self.main_stream: self.client_stream = None self.main_stream.on_recv(None) self.main_stream.socket.setsockopt(zmq.LINGER, 0) self.main_stream.socket.close() self.main_stream.close() self.main_stream = None if self.client_stream: self.client_stream.on_recv(None) self.client_stream.socket.setsockopt(zmq.LINGER, 0) self.client_stream.socket.close() self.client_stream.close() self.client_stream = None self._workers = {} self._services = {} _LOG.info( "Shutting down! All workers unregistered, will not process more messages." ) return def on_timer(self): """Method called on timer expiry. Checks which workers are dead and unregisters them. :rtype: None """ # use list to avoid size change during iteration error for wtracker in list(self._workers.values()): if not wtracker.is_alive(): _LOG.debug("Worker with id: '%s' timed out." % bytes_to_hexstring(wtracker.id)) self.unregister_worker(wtracker.id) return def on_ready(self, rp, msg): """Process worker READY command. Registers the worker for a service. :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ try: ret_id = rp[0] service = msg.pop(0) worker_type = msg.pop(0) address = msg.pop(0) protocols = msg.pop(0) self.register_worker(ret_id, service, worker_type, address, protocols) except IndexError: _LOG.debug("Error while registering worker %s: %s" % (rp, msg)) return def on_reply(self, rp, msg): """Process worker REPLY command. Route the `msg` to the client given by the address(es) in front of `msg`. :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ ret_id = rp[0] wtracker = self._workers.get(ret_id) if not wtracker: # worker not found, ignore message return service = wtracker.service # make worker available again try: wq, wr = self._services[service] cp, msg = split_address(msg) if cp[0] == 'BROKER': self.update_worker_info(ret_id, msg) return cmd = msg.pop(0) self.client_response(cp, service, cmd, bytes_to_hexstring(wtracker.id), msg) wq.put(wtracker.id) self.change_worker_status(rp[0], WORKER_ONLINE_STATUS) if wr: proto, rp, msg = wr.pop(0) self.on_client(proto, rp, msg) except KeyError: # unknown service _LOG.info("Worker with id: '%s' reports an unknown service." % bytes_to_hexstring(ret_id)) self.disconnect(ret_id) return def on_heartbeat(self, rp, msg): """Process worker HEARTBEAT command. :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ ret_id = rp[0] try: worker = self._workers[ret_id] if worker.is_alive(): worker.on_heartbeat() except KeyError: # ignore HB for unknown worker pass return def on_disconnect(self, rp, msg): """Process worker DISCONNECT command. Unregisters the worker who sent this message. :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ wid = rp[0] _LOG.debug("Worker with id: '%s' wants to disconnect." % bytes_to_hexstring(wid)) self.change_worker_status(wid, WORKER_INACTIVE_STATUS) self.unregister_worker(wid) return def on_ho(self, rp, service, msg): """Process HO request. For now only ho.service is handled. :param rp: return address stack :type rp: list of str :param service: the protocol id sent :type service: str :param msg: message parts :type msg: list of str :rtype: None """ _LOG.debug("New HO request received for service [%s] and code [%s]." % (service, msg)) if service == b'ho.service': s = msg[0] ret = b'404' # TODO: review this for wr in self._workers.values(): if s == wr.service: ret = b'200' break self.client_response(rp, service, '', '', [ret]) elif service == b'ho.stat': cmd = msg[0] if cmd in self._local_cmds: fnc = self._local_cmds[cmd] ret = msgpack.packb(fnc()) else: ret = 'Command not supported' _LOG.debug("Replying with: [%s]." % ret) self.client_response(rp, service, cmd, '', [ret]) else: self.client_response(rp, service, '', '', [b'501']) return def on_client(self, proto, rp, msg): """Method called on client message. Frame 0 of msg is the requested service. The remaining frames are the request to forward to the worker. .. note:: If the service is unknown to the broker the message is ignored. .. note:: If currently no worker is available for a known service, the message is queued for later delivery. If a worker is available for the requested service, the message is repackaged and sent to the worker. The worker in question is removed from the pool of available workers. If the service name starts with `ho.`, the message is passed to the internal HO_ handler. :param proto: the protocol id sent :type proto: str :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ _LOG.debug("Received a new request from client: %s regarding %s." % (rp, msg[0])) service = msg.pop(0) if service.startswith(b'ho.'): self.on_ho(rp, service, msg) return try: if len(msg) != 2 and len(msg) != 3: _LOG.debug("Request was not formed correctly, ignoring") return wq, wr = self._services[service] cwid = msg.pop(0) wid = self.find_worker(hexstring_to_bytes(cwid), service) if not wid: # no worker ready # queue message msg.insert(0, cwid) msg.insert(0, service) wr.append((proto, rp, msg)) return wtracker = self._workers[wid] to_send = [wtracker.id, b'', WORKER_PROTO, MSG_QUERY] to_send.extend(rp) to_send.append(b'') to_send.extend(msg) self.change_worker_status(wtracker.id, WORKER_BUSY_STATUS) if self.main_stream.closed(): self.shutdown() self.main_stream.send_multipart(to_send) except KeyError: # unknwon service # ignore request _LOG.debug('Broker has no service "%s"' % service) return def on_worker(self, proto, rp, msg): """Method called on worker message. Frame 0 of msg is the command id. The remaining frames depend on the command. This method determines the command sent by the worker and calls the appropriate method. If the command is unknown the message is ignored and a DISCONNECT is sent. :param proto: the protocol id sent :type proto: str :param rp: return address stack :type rp: list of str :param msg: message parts :type msg: list of str :rtype: None """ _LOG.debug("Received a new reply from worker: %s." % rp) cmd = msg.pop(0) if cmd in self._worker_cmds: fnc = self._worker_cmds[cmd] fnc(rp, msg) else: # ignore unknown command # DISCONNECT worker _LOG.info( "Worker with id: '%s' is trying to use an unknown command." % bytes_to_hexstring(rp[0])) self.disconnect(rp[0]) return def on_message(self, msg): """Processes given message. Decides what kind of message it is -- client or worker -- and calls the appropriate method. If unknown, the message is ignored. :param msg: message parts :type msg: list of str :rtype: None """ _LOG.debug("Received: %s" % msg) rp, msg = split_address(msg) # TODO: this condition should be changed to something better if len(msg) < 2: _LOG.debug("Unrecognized message.") return # dispatch on first frame after path t = msg.pop(0) if t.startswith(b'MNPW'): self.on_worker(t, rp, msg) elif t.startswith(b'MNPC'): self.on_client(t, rp, msg) else: _LOG.debug('Broker unknown Protocol: "%s"' % t) return def find_worker(self, wid, service): """Find a worker with the given id. :param wid: data id :type wid: str :param service: service the worker supports :type service: str :rtype: str """ wq, wr = self._services[service] if wq.__contains__(wid): wq.remove(wid) _LOG.debug( "Worker with id: %s selected and removed from the pool." % wid) return wid else: return None def register_worker_info(self, wid, service=SERVICE_BROKER, status=WORKER_ONLINE_STATUS, worker_type=WK_TYPE_BROKER, address=get_current_ip(DEFAULT_INTEFACE), protocols=None): """Update the worker info list. :param wid: the worker id. :type wid: str :param service: the service name. :type service: str :param status: the current network status of the worker. :type status: str :param worker_type: the specific worker type. :type worker_type: str :param address: the ipv4 or ipv6 of this worker. :type address: str :param protocols: the routing protocols reported by the worker. :type protocols: str :rtype: None """ service_list = set([]) service_list.add(service) existing_worker = next((x for x in self._workers_info if self._workers_info[x]['ip'] == address), None) if existing_worker: service_list.update(self._workers_info[existing_worker]['service']) del self._workers_info[existing_worker] if isinstance(protocols, str): protocols = protocols.split(' ') worker_info = { 'id': bytes_to_hexstring(wid), 'status': status, 'ip': address, 'type': worker_type, 'service': list(service_list), 'protocols': protocols } self._workers_info[wid] = worker_info # broker will store a list of protocols supported by all clients if wid != self.main_stream.getsockopt(zmq.IDENTITY): if self._workers_info[self.main_stream.getsockopt( zmq.IDENTITY)]['protocols'] is None: self._workers_info[self.main_stream.getsockopt( zmq.IDENTITY)]['protocols'] = protocols else: self._workers_info[self.main_stream.getsockopt(zmq.IDENTITY)]['protocols'] = \ list(set(self._workers_info[self.main_stream.getsockopt(zmq.IDENTITY)]['protocols']).intersection( set(protocols))) return def reset_node_info(self, wid, status=WORKER_OFFLINE_STATUS): worker_info = { 'id': bytes_to_hexstring(wid), 'status': status, 'ip': self._workers_info[wid]['ip'], 'type': self._workers_info[wid]['type'], 'service': self._workers_info[wid]['service'], 'protocols': self._workers_info[wid]['protocols'] } try: self._workers_info[wid] = worker_info except KeyError: pass def change_worker_status(self, wid, status): """Change the status of the worker with the given id. :param wid: data id :type wid: str :param status: service the worker supports :type status: str :rtype: str """ try: self._workers_info[wid]['status'] = status except KeyError: pass def get_workers_info(self): """Return a list with the information of all current workers. :rtype: dict """ return self._workers_info.values() def update_worker_info(self, wid, data): # for now there is only one possible command cmd = data.pop(0) wdata = msgpack.unpackb(data.pop(0)) self._workers_info[wid].update(wdata) def collect_workers_info(self): # A return address pointing to 'BROKER' will be interpreted as belonging to an internal 'ho' request self.send_to_all_workers(b'BROKER') def send_to_all_workers(self, rp, msg_type=MSG_WDUMP): for wid in self._workers: to_send = [wid, b'', WORKER_PROTO, MSG_QUERY, rp, b'', msg_type] # self.change_worker_status(wid, WORKER_BUSY_STATUS) if self.main_stream.closed(): self.shutdown() self.main_stream.send_multipart(to_send)
class Socket: """Wrapper class for zmq.Socket This class utilizes a tornado event loop to support using ZmqStream for sending and receiving messages. Additionally it stages sockets with specific configurations to allow request/reply sockets to be more robust to timeout conditions and failure states. Attributes: logger: Logger instance for all socket activity loop: Tornado event loop instance address: Assigned address of the zmq.Socket protocol: Assigned zmq socket type ctx: ZMQ context instance stream: ZmqStream instance zmq_socket: Underlying zmq.Socket object """ def __init__(self, loop, protocol): """Constructor for Socket class Args: loop: Tornado event loop protocol: Assigned protocol for the zmq.Socket """ self.logger = logging.getLogger("Socket") self.loop = loop self.protocol = protocol self.server = True if (protocol == zmq.PUB or protocol == zmq.REP) else False self.ctx = zmq.Context().instance() self.stream = None self.zmq_socket = None self.address = None self.port = None self.create_socket(protocol) def create_socket(self, protocol): """Helper function for creating a zmq.Socket of various types with various options Assumes that request sockets need extra configuration options to prevent erroneous states when two requests are sent before a reply is received. REQ_RELAXED will drop the first request and reset the underlying socket automatically allowing the second request to be processed. Additionally, these options ensure that an event loop can exit even if a send is pending but hasn't been sent yet. Args: protocol: zmq socket type """ if protocol == zmq.REQ: # make sure that replies back to req's are coordinated with header data self.ctx.setsockopt(zmq.REQ_CORRELATE, 1) # allow req socket to internall try to reconnect if two sends are sent in a row self.ctx.setsockopt(zmq.REQ_RELAXED, 1) # timeout for trying to send self.ctx.setsockopt(zmq.SNDTIMEO, 1000) # ensure that the socket doesn't block on close self.ctx.setsockopt(zmq.LINGER, 0) self.zmq_socket = self.ctx.socket(protocol) def connect(self, address, port): """Connect the socket to a local or remote address:port Args: address: Decimal separated string (eg, 127.0.0.1) where service is bound port: int associated with service port """ self.zmq_socket.connect("tcp://{}:{}".format(address, port)) self.address = address # 127.0.0.1 self.port = port # 10001 self.start_stream() return (self.address, self.port) def disconnect(self): """Stop the zmqStream and disconnect the underlying socket """ # TODO(pickledgator): Figure out why this fails with error: Socket operation on non-socket self.stop_stream() try: self.zmq_socket.disconnect("tcp://{}:{}".format( self.address, self.port)) except Exception as e: pass def get_local_ip(self): """Identifies the ip address of the local node Returns: String: Decimal separated string (eg, 127.0.0.1) """ # TODO(pickledgator): Check robustness of this strategy and switch to netifaces if needed s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) try: s.connect(("10.255.255.255", 1)) ip = s.getsockname()[0] except: ip = "127.0.0.1" finally: s.close() return ip def set_filter(self, filter_string=""): """Helper function to enable zmq.SUBSCRIBER filters In zmq, the filter (string or bytes) can be used to screen multi-part messages such that only messages with the first element of the array match an associated string. Currently, we configure the subscriber sockets to receive all messages from any publisher. TODO(pickledgator): Add support for filter->separate callbacks? Args: filter_string: Setup socket to only allow multi-part messages whose first element match this string """ # "" is a wildcard to accept all messages self.zmq_socket.setsockopt_string(zmq.SUBSCRIBE, filter_string) def bind(self): """Bind the underlying zmq socket to an ip on the local machine at a random available port Also kicks off the zmqStream after binding. Returns: (String, int): Tuple containing the address string and the port chosen """ # TODO(pickledgator): Find specific range that has the most availability ip = self.get_local_ip() port = self.zmq_socket.bind_to_random_port("tcp://{}".format(ip), min_port=10001, max_port=20000, max_tries=100) self.address = ip self.port = port self.start_stream() return (self.address, self.port) def unbind(self): """Reverse the bind of the underlying zmq socket and stop the zmqStream """ # TODO(pickledgator): Figure out why this fails with error: Socket operation on non-socket self.stop_stream() try: self.zmq_socket.unbind("tcp://{}:{}".format( self.address, self.port)) except Exception as e: pass def send(self, message): """Identifies the correct underlying zmq send method based on the type of message Args: message: Message to be sent (string or bytes) """ self.logger.debug("Sending message: {}".format(message)) if type(message) == str: # assumes string self.stream.send_string(message) else: # assumes bytes self.stream.send(message) def start_stream(self): if not self.stream: self.stream = ZMQStream(self.zmq_socket, self.loop) def stop_stream(self): if self.stream: if not self.stream.closed(): # TODO(pickledgator): does this block if no recv? self.stream.stop_on_recv() self.stream.close() self.stream = None def cycle_socket(self): self.close() self.create_socket(self.protocol) # self.connect(self.address, self.port) def receive(self, handler, timeout_ms=None, timeout_callback=None): def msg_handler(handler, timeout, message): # this callback receives a message list, with one element, so just pass the contents to the # application handler handler(message[0].decode("utf-8")) # if we received the message, then we need to cancel the watchdog timeout from # the last receive call if timeout: self.loop.remove_timeout(timeout) def handle_timeout(timeout_callback): if timeout_callback: timeout_callback() # the event that we hit a timeout, cycle the socket so that it doesn't # get stuck in a weird state self.cycle_socket() if self.stream: if timeout_ms: # if we want to detect when recv fails, setup a timeout that cleans up the # socket (RequestClients) timeout = self.loop.call_later( timeout_ms / 1000.0, functools.partial(handle_timeout, timeout_callback)) # always set the handler, in case it changed self.stream.on_recv( functools.partial(msg_handler, handler, timeout)) else: # handle cases when we dont want to put a timeout on the recv function (subscribers) self.stream.on_recv( functools.partial(msg_handler, handler, None)) else: self.logger.error("Stream is not open") def close(self): try: self.disconnect() except: pass self.zmq_socket.close()
class MNClient(MN_object): """Class for the MN client side. Thin asynchronous encapsulation of a zmq.REQ socket. Provides a :func:`request` method with optional timeout. :param context: the ZeroMQ context to create the socket in. :type context: zmq.Context :param endpoint: the endpoint to connect to. :type endpoint: str :param service: the service the client should use :type service: str """ _proto_version = CLIENT_PROTO def __init__(self, context, endpoint, service): """Initialize the MNClient. """ self.context = context self.service = service self.endpoint = endpoint self.can_send = True self._proto_prefix = ['', CLIENT_PROTO, service] self._tmo = None self.timed_out = False self._create_stream() return def _create_stream(self): """Helper for opening a stream. """ socket = self.context.socket(zmq.DEALER) ioloop = IOLoop.instance() self.stream = ZMQStream(socket, ioloop) self.stream.on_recv(self._on_message) self.stream.socket.setsockopt(zmq.LINGER, 0) self.stream.connect(self.endpoint) def shutdown(self): """Method to deactivate the client connection completely. Will delete the stream and the underlying socket. .. warning:: The instance MUST not be used after :func:`shutdown` has been called. :rtype: None """ if not self.stream: return self.stream.socket.close() self.stream.close() self.stream = None return def request(self, msg, timeout=None): """Send the given message. :param msg: message parts to send. :type msg: list of str :param timeout: time to wait in milliseconds. :type timeout: int :rtype None: """ if not self.can_send: raise InvalidStateError() if isinstance(msg, bytes): msg = [msg] # prepare full message # to_send = self._proto_prefix[:] # to_send.extend(msg) to_send = msg if self.stream.closed(): self._create_stream() # TODO check this self.stream.send_multipart(to_send) self.can_send = False if timeout: self._start_timeout(timeout) return def _on_timeout(self): """Helper called after timeout. """ self.timed_out = True self._tmo = None self.on_timeout() return def _start_timeout(self, timeout): """Helper for starting the timeout. :param timeout: the time to wait in milliseconds. :type timeout: int """ self._tmo = DelayedCallback(self._on_timeout, timeout) self._tmo.start() return def _on_message(self, msg): """Helper method called on message receive. :param msg: list of message parts. :type msg: list of str """ if self._tmo: # disable timout self._tmo.stop() self._tmo = None # setting state before invoking on_message, so we can request from there self.can_send = True self.on_message(msg) return def on_message(self, msg): """Public method called when a message arrived. .. note:: Does nothing. Should be overloaded! """ pass def on_timeout(self): """Public method called when a timeout occurred. .. note:: Does nothing. Should be overloaded! """ pass