def next_task_from_schedule(self): if self._active_finish: logger.error(*self.lfm.crawled('CrawlerRunner', self.name, '正在执行的任务有:%s'%self._crawlers)) return if self.pause: return if self.delay_stop.active(): self.delay_stop.cancel() while self.needs_backout(): self.crawl(self.spidercls) def change_falg(_): self._active_finish = False self.slot.nextcall.schedule() return None if self._active: self._active_finish = True logger.warning('即将激活的任务:%s'%self._crawlers) d = DeferredList(self._active) d.addBoth(change_falg) yield d if self.runner_is_idle(): self.stop()
def provision(self, request): """ Provision the device with credentials from a cloud controller. """ cors.config_cors(request) body = json.loads(request.content.read().decode('utf-8')) routerId = body['routerId'] apitoken = body['apitoken'] pdserver = body['pdserver'] wampRouter = body['wampRouter'] changed = False if routerId != nexus.core.info.pdid \ or pdserver != nexus.core.info.pdserver \ or wampRouter != nexus.core.info.wampRouter: if pdserver and wampRouter: nexus.core.provision(routerId, pdserver, wampRouter) else: nexus.core.provision(routerId) changed = True if apitoken != nexus.core.getKey('apitoken'): nexus.core.saveKey(apitoken, 'apitoken') changed = True if changed: PDServerRequest.resetToken() nexus.core.jwt_valid = False def set_update_fetcher(session): session.set_update_fetcher(self.update_fetcher) @inlineCallbacks def start_polling(result): yield self.update_fetcher.start_polling() def send_response(result): response = dict() response['provisioned'] = True response['httpConnected'] = nexus.core.jwt_valid response['wampConnected'] = nexus.core.wamp_connected request.setHeader('Content-Type', 'application/json') return json.dumps(response) wampDeferred = nexus.core.connect(WampSession) wampDeferred.addCallback(set_update_fetcher) httpDeferred = sendStateReport() httpDeferred.addCallback(start_polling) identDeferred = sendNodeIdentity() dl = DeferredList([wampDeferred, httpDeferred, identDeferred], consumeErrors=True) dl.addBoth(send_response) reactor.callLater(6, dl.cancel) return dl else: return json.dumps({'success': False, 'message': 'No change on the provision parameters'})
def run(self): jobs, self._jobs = self._jobs[:], [] jobs_done = DeferredList(jobs) jobs_done.addBoth(lambda ignore: self._thread_pool.stop()) jobs_done.addBoth(lambda ignore: reactor.stop()) reactor.callWhenRunning(self._thread_pool.start) reactor.run(self._install_signal_handlers)
def stop(self): log.info("\n") log.info('end-of-execution-stopping-consumers') # Ask each of our consumers to stop. When a consumer fully stops, it # fires the deferred returned from its start() method. We saved all # those deferreds away (above, in start()) in self._consumer_d_list, # so now we'll use a DeferredList to wait for all of them... for consumer in self._consumer_list: consumer.stop() dl = DeferredList(self._consumer_d_list) # Once the consumers are all stopped, then close our client def _stop_client(result): if isinstance(result, Failure): log.error('error', result=result) else: log.info('all-consumers-stopped', client=self._client) self._client.close() return result dl.addBoth(_stop_client) # And once the client is shutdown, stop the reactor def _stop_reactor(result): reactor.stop() return result dl.addBoth(_stop_reactor)
def cleanUp(self, wasClean, code, reason): """Thorough clean-up method to cancel all remaining deferreds, and send connection metrics in""" self.ps.metrics.increment("client.socket.disconnect", tags=self.base_tags) elapsed = (ms_time() - self.ps.connected_at) / 1000.0 self.ps.metrics.timing("client.socket.lifespan", duration=elapsed, tags=self.base_tags) # Cleanup our client entry if self.ps.uaid and self.ap_settings.clients.get(self.ps.uaid) == self: del self.ap_settings.clients[self.ps.uaid] # Cancel any outstanding deferreds that weren't already called for d in self.ps._callbacks: if not d.called: d.cancel() # Attempt to deliver any notifications not originating from storage if self.ps.direct_updates: defers = [] if self.ps.use_webpush: for notifs in self.ps.direct_updates.values(): notifs = filter(lambda x: x.ttl != 0, notifs) defers.extend(map(self._save_webpush_notif, notifs)) else: for chid, version in self.ps.direct_updates.items(): defers.append(self._save_simple_notif(chid, version)) # Tag on the notifier once everything has been stored dl = DeferredList(defers) dl.addBoth(self._lookup_node) # Delete and remove remaining dicts and lists del self.ps.direct_updates del self.ps.updates_sent
def asynchroTest(requests, addresses): dl = list() for z in zip(requests, addresses): d = agent.request('POST', z[1], bodyProducer=StringProducer(z[0])) d.addCallback(printResource) dl.append(d) deferList = DeferredList(dl, consumeErrors=True) deferList.addCallback(printResource) deferList.addBoth(stop) reactor.run()
def _deferred_field(self, field, item, spider): deferreds = [ self._deferred_value(value, spider) for value in arg_to_iter(item.get(field)) ] if not deferreds: item[field] = None return defer_result(item) deferred = DeferredList(deferreds, consumeErrors=True) deferred.addBoth(self._add_value, field, item) return deferred
def main(servers): l = [] for server in servers: d = Deferred() l.append(d) s = SearchFactory(server, d) reactor.connectTCP(server, 389, s) d.addErrback(errback) dl = DeferredList(l) dl.addBoth(lambda x: reactor.stop()) reactor.run() sys.exit(exitStatus)
def process_item(self, item, spider): """ resolve IDs to labels in specified fields """ if not any(item.get(field) for field in self.fields): return item deferred = DeferredList( [self._deferred_field(field, item, spider) for field in self.fields], consumeErrors=True, ) deferred.addBoth(lambda _: item) return deferred
def rename_folder(self, folder, new_folder): """ Renames a folder within a torrent. This basically does a file rename on all of the folders children. :returns: A deferred which fires when the rename is complete :rtype: twisted.internet.defer.Deferred """ log.debug("attempting to rename folder: %s to %s", folder, new_folder) if len(new_folder) < 1: log.error( "Attempting to rename a folder with an invalid folder name: %s", new_folder) return new_folder = sanitize_filepath(new_folder, folder=True) def on_file_rename_complete(result, wait_dict, index): wait_dict.pop(index, None) wait_on_folder = {} self.waiting_on_folder_rename.append(wait_on_folder) for f in self.get_files(): if f["path"].startswith(folder): # Keep track of filerenames we're waiting on wait_on_folder[f["index"]] = Deferred().addBoth( on_file_rename_complete, wait_on_folder, f["index"]) new_path = f["path"].replace(folder, new_folder, 1) try: self.handle.rename_file(f["index"], new_path) except TypeError: self.handle.rename_file(f["index"], new_path.encode("utf-8")) def on_folder_rename_complete(result, torrent, folder, new_folder): component.get("EventManager").emit( TorrentFolderRenamedEvent(torrent.torrent_id, folder, new_folder)) # Empty folders are removed after libtorrent folder renames self.remove_empty_folders(folder) torrent.waiting_on_folder_rename = filter( None, torrent.waiting_on_folder_rename) component.get("TorrentManager").save_resume_data( (self.torrent_id, )) d = DeferredList(wait_on_folder.values()) d.addBoth(on_folder_rename_complete, self, folder, new_folder) return d
def process_ack(self, data): """Process an ack message, delete notifications from storage if needed""" updates = data.get("updates") if not updates or not isinstance(updates, list): return self.ps.metrics.increment("updates.client.ack", tags=self.base_tags) defers = filter(None, map(self.ack_update, updates)) if defers: self.transport.pauseProducing() dl = DeferredList(defers) dl.addBoth(self.check_missed_notifications, True) else: self.check_missed_notifications(None)
def assignment_stopped(_, assign_id): logger.debug("Assignment %s has stopped", assign_id) if (len(config["current_assignments"]) <= 1 and not self.agent.shutting_down): config["state"] = AgentState.ONLINE self.agent.reannounce(force=True) assignment = config["current_assignments"][assign_id] if "jobtype" in assignment: jobtype_id = assignment["jobtype"].pop("id", None) if jobtype_id: jobtype = config["jobtypes"].pop(jobtype_id, None) updates_deferred = DeferredList( jobtype.task_update_deferreds) updates_deferred.addBoth(remove_assignment, assign_id) else: config["current_assignments"].pop(assign_id)
def _evaluate_internal(self, flag, user): def check_prereq_results(result): prereq_ok = True for (success, prereq_ok) in result: if success is False or prereq_ok is False: prereq_ok = False if prereq_ok is True: index = _evaluate_index(flag, user) variation = _get_variation(flag, index) return variation return None results = DeferredList(map(partial(self._evaluate_prereq, user), flag.get('prerequisites') or [])) results.addBoth(check_prereq_results) return results
def main(dns): l = [] resolver = client.Resolver("/etc/resolv.conf") for dnString in dns: dn = DistinguishedName(stringValue=dnString) domain = dn.getDomainName() d = resolver.lookupService("_ldap._tcp.%s" % domain) l.append(d) d.addCallback(printAnswer, dnString) d.addErrback(errback) dl = DeferredList(l) dl.addBoth(lambda dummy: reactor.callLater(0, reactor.stop)) reactor.run() sys.exit(exitStatus)
def get(self): """HTTP Get Returns basic information about the version and how many clients are connected in a JSON object. """ self._healthy = True self._health_checks = dict( version=__version__, clients=len(getattr(self.application, 'clients', ())) ) dl = DeferredList([ self._check_table(self.db.router.table), self._check_table(self.db.storage.table) ]) dl.addBoth(self._finish_response)
def get(self, *args, **kwargs): """HTTP Get Returns basic information about the version and how many clients are connected in a JSON object. """ self._healthy = True self._health_checks = { "version": __version__, "clients": len(self.ap_settings.clients) } dl = DeferredList([ self._check_table(self.ap_settings.router.table), self._check_table(self.ap_settings.storage.table) ]) dl.addBoth(self._finish_response)
def get(self): """HTTP Get Returns basic information about the version and how many clients are connected in a JSON object. """ self._healthy = True self._health_checks = { "version": __version__, "clients": len(self.ap_settings.clients) } dl = DeferredList([ self._check_table(self.ap_settings.router.table), self._check_table(self.ap_settings.storage.table) ]) dl.addBoth(self._finish_response)
def render(self, request): """ This is the 'main' RPC method. This will always be called when a request arrives and it's up to this method to parse the request and dispatch it further. @type request: t.w.s.Request @param request: Request from client @rtype: some constant :-) @return: NOT_DONE_YET signalizing, that there's Deferred, that will take care about sending the response. @TODO verbose mode """ try: request_content = self._getRequestContent(request) if not request_content: raise jsonrpc.JSONRPCError('Empty request', jsonrpc.INVALID_REQUEST) except jsonrpc.JSONRPCError: self._parseError(request) return server.NOT_DONE_YET is_batch = True if not isinstance(request_content, list): request_content = [request_content] is_batch = False dl = [] for request_dict in request_content: d = succeed(request_dict) d.addCallback(jsonrpc.verifyMethodCall) d.addCallback(self._callMethod) d.addBoth(jsonrpc.prepareMethodResponse, request_dict['id'], request_dict['jsonrpc']) dl.append(d) dl = DeferredList(dl, consumeErrors=True) dl.addBoth(self._cbFinishRequest, request, is_batch) return server.NOT_DONE_YET
def run(self): """ Start SQL collection. """ deferreds = [] for plugin in self.plugins: log.debug("Running collection for plugin %s", plugin.name()) tasks = [] for table, task in plugin.prepareQueries(self.device).iteritems(): dsc = DataSourceConfig(*task) dbapiName = dsc.connectionString.split(',', 1)[0].strip('\'"') executor = self._pools.setdefault(dbapiName, adbapiExecutor()) deferred = executor.submit(dsc) deferred.addBoth(self.parseResult,plugin.name(),table,dbapiName) tasks.append(deferred) tdl = DeferredList(tasks) deferreds.append(tdl) tdl.addBoth(self.collectComplete, plugin) dl = DeferredList(deferreds) dl.addBoth(self.collectComplete, None)
def stringReceived(self, string): """ This is the 'main' RPC method. This will always be called when a request arrives and it's up to this method to parse the request and dispatch it further. @type string: str @param string: Request from client, just the 'string' itself, already stripped of the netstring stuff. @rtype: DeferredList @return: Deferred, that will fire when all methods are finished. It will already have all the callbacks and errbacks neccessary to finish and send the response. """ self._logRequest(string) try: request_content = jsonrpc.decodeRequest(string) except jsonrpc.JSONRPCError: self._parseError() return None is_batch = True if not isinstance(request_content, list): request_content = [request_content] is_batch = False dl = [] for request_dict in request_content: d = succeed(request_dict) d.addCallback(jsonrpc.verifyMethodCall) d.addCallback(self._callMethod) d.addBoth(jsonrpc.prepareMethodResponse, request_dict['id'], request_dict['jsonrpc']) dl.append(d) dl = DeferredList(dl, consumeErrors=True) dl.addBoth(self._cbFinishRequest, is_batch) return dl
def oneBatch(prev, ipList): done = 0 deferreds = [] for toDo in ipList: deferreds.append(makeRequest(toDo)) done += 1 if done == concurrentRequests: break else: print "\n%d requests ended in empty match" % prev d = getPage("%s/stats/avg" % (baseUrl, )) @d.addCallback def c2(time): print "Average matching time in server: %s" % time reactor.stop() return d = DeferredList(deferreds) @d.addCallback def c(results): return prev + len([x for x in results if x[1] in ("[]", "ok\n")]) d.addErrback(log.err) d.addBoth(oneBatch, ipList)
def render(self, request): """ This is the 'main' RPC method. This will always be called when a request arrives and it's up to this method to parse the request and dispatch it further. @type request: t.w.s.Request @param request: Request from client @rtype: some constant :-) @return: NOT_DONE_YET signalizing, that there's Deferred, that will take care about sending the response. @TODO verbose mode """ try: request_content = self._getRequestContent(request) except jsonrpc.JSONRPCError: self._parseError(request) return server.NOT_DONE_YET is_batch = True if not isinstance(request_content, list): request_content = [request_content] is_batch = False dl = [] for request_dict in request_content: d = succeed(request_dict) d.addCallback(jsonrpc.verifyMethodCall) d.addCallback(self._callMethod) d.addBoth(jsonrpc.prepareMethodResponse, request_dict['id'], request_dict['jsonrpc']) dl.append(d) dl = DeferredList(dl, consumeErrors=True) dl.addBoth(self._cbFinishRequest, request, is_batch) return server.NOT_DONE_YET
def rename_folder(self, folder, new_folder): """ Renames a folder within a torrent. This basically does a file rename on all of the folders children. :returns: A deferred which fires when the rename is complete :rtype: twisted.internet.defer.Deferred """ log.debug("attempting to rename folder: %s to %s", folder, new_folder) if len(new_folder) < 1: log.error("Attempting to rename a folder with an invalid folder name: %s", new_folder) return new_folder = sanitize_filepath(new_folder, folder=True) def on_file_rename_complete(result, wait_dict, index): wait_dict.pop(index, None) wait_on_folder = {} self.waiting_on_folder_rename.append(wait_on_folder) for f in self.get_files(): if f["path"].startswith(folder): # Keep track of filerenames we're waiting on wait_on_folder[f["index"]] = Deferred().addBoth(on_file_rename_complete, wait_on_folder, f["index"]) new_path = f["path"].replace(folder, new_folder, 1) try: self.handle.rename_file(f["index"], new_path) except TypeError: self.handle.rename_file(f["index"], new_path.encode("utf-8")) def on_folder_rename_complete(result, torrent, folder, new_folder): component.get("EventManager").emit(TorrentFolderRenamedEvent(torrent.torrent_id, folder, new_folder)) # Empty folders are removed after libtorrent folder renames self.remove_empty_folders(folder) torrent.waiting_on_folder_rename = filter(None, torrent.waiting_on_folder_rename) component.get("TorrentManager").save_resume_data((self.torrent_id,)) d = DeferredList(wait_on_folder.values()) d.addBoth(on_folder_rename_complete, self, folder, new_folder) return d
def onLeave(self, details): # when this router is shutting down, we disconnect all our # components so that they have a chance to shutdown properly # -- e.g. on a ctrl-C of the router. leaves = [] for component in self.components.values(): if component.session.is_connected(): d = maybeDeferred(component.session.leave) def done(_): self.log.info( "component '{id}' disconnected", id=component.id, ) component.session.disconnect() d.addCallback(done) leaves.append(d) dl = DeferredList(leaves, consumeErrors=True) # we want our default behavior, which disconnects this # router-worker, effectively shutting it down .. but only # *after* the components got a chance to shutdown. dl.addBoth(lambda _: super(RouterWorkerSession, self).onLeave(details))
def cleanUp(self): """Thorough clean-up method to cancel all remaining deferreds, and send connection metrics in""" self.metrics.increment("client.socket.disconnect", tags=self.base_tags) elapsed = (ms_time() - self.connected_at) / 1000.0 self.metrics.timing("client.socket.lifespan", duration=elapsed, tags=self.base_tags) # Cleanup our client entry if self.uaid and self.ap_settings.clients.get(self.uaid) == self: del self.ap_settings.clients[self.uaid] # Cancel any outstanding deferreds for d in self._callbacks: d.cancel() # Attempt to deliver any notifications not originating from storage if self.direct_updates: defers = [] for chid, version in self.direct_updates.items(): d = deferToThread( self.ap_settings.storage.save_notification, self.uaid, chid, version ) d.addErrback(self.log_err) defers.append(d) # Tag on the notifier once everything has been stored dl = DeferredList(defers) dl.addBoth(self._lookup_node) # Delete and remove remaining dicts and lists del self.direct_updates del self.updates_sent
def _openDataConnection(self, command, protocol): """ This method returns a DeferredList. """ cmd = FTPCommand(command, public=1) if self.passive: # Hack: use a mutable object to sneak a variable out of the # scope of doPassive _mutable = [None] def doPassive(response): """Connect to the port specified in the response to PASV""" host, port = decodeHostPort(response[-1]) class _Factory(ClientFactory): noisy = 0 def buildProtocol(self, ignored): self.protocol.factory = self return self.protocol def clientConnectionFailed(self, connector, reason): self.protocol.connectionFailed() f = _Factory() f.protocol = protocol _mutable[0] = reactor.connectTCP(host, port, f) pasvCmd = FTPCommand('PASV') self.queueCommand(pasvCmd) pasvCmd.deferred.addCallback(doPassive).addErrback(self.fail) results = [cmd.deferred, pasvCmd.deferred, protocol.deferred] d = DeferredList(results, fireOnOneErrback=1) # Ensure the connection is always closed def close(x, m=_mutable): m[0] and m[0].disconnect() return x d.addBoth(close) else: # We just place a marker command in the queue, and will fill in # the host and port numbers later (see generatePortCommand) portCmd = FTPCommand('PORT') # Ok, now we jump through a few hoops here. # This is the problem: a transfer is not to be trusted as complete # until we get both the "226 Transfer complete" message on the # control connection, and the data socket is closed. Thus, we use # a DeferredList to make sure we only fire the callback at the # right time. portCmd.transferDeferred = protocol.deferred portCmd.protocol = protocol portCmd.deferred.addErrback(portCmd.transferDeferred.errback) self.queueCommand(portCmd) # Create dummy functions for the next callback to call. # These will also be replaced with real functions in # generatePortCommand. portCmd.loseConnection = lambda result: result portCmd.fail = lambda error: error # Ensure that the connection always gets closed cmd.deferred.addErrback(lambda e, pc=portCmd: pc.fail(e) or e) results = [cmd.deferred, portCmd.deferred, portCmd.transferDeferred] d = DeferredList(results, fireOnOneErrback=1) self.queueCommand(cmd) return d
returnValue((yield fun_test(content))) def fun_test(content): print(content) for i in range(10): yield str(i)+":"+"test" def fun_print(content): print(type(content)) print(content) return content def end_fun(content): print("end_content",content) try: reactor.stop() except Exception as e: print(e) d = getPage(b"https://www.smzdm.com") d.addCallback(my_callbacks) d.addCallback(fun_print) #d.addCallback(end_fun) dd = DeferredList([d,]) dd.addBoth(end_fun) reactor.run()
def main(agent, URLS): diferred_list = [] for URL in URLS: diferred_list.append(get_response(agent, URL)) ll = DeferredList(diferred_list, consumeErrors=True) ll.addBoth(lambda stop: reactor.stop())
def errorCallback(error): print(response.request.absoluteURI, error) finished.addErrback(errorCallback) return finished _ = [] for url in [ b"http://www.baidu.com/s?wd=python", b"http://www.baidu.com/s?wd=itcast"]: d = agent.request( b'GET', url, Headers({'User-Agent': ['Twisted Web Client Example']}), None) d.addCallback(successCallback) _.append(d) dl = DeferredList(_) # 统一管理多个defered对象 def callbackShutdown(ignored): reactor.stop() dl.addBoth(callbackShutdown) if __name__ == '__main__': reactor.run()
filename='airquality.log', datefmt='%y-%m-%d %H:%M', filemode='a', format='%(asctime)s %(message)s') def main(twisted_agent, generated_urls): deferred_lst = [] for url in generated_urls: try: d = getResponse(twisted_agent, url) deferred_lst.append(d) except Exception, e: print(e) list_deferred = DeferredList(deferred_lst, consumeErrors=True) list_deferred.addBoth(lambda shutdown: reactor.stop()) def error(reason): logging.error(reason.value) @inlineCallbacks def getResponse(twisted_agent, url): try: response = yield twisted_agent.request( method=METHOD, uri=url, headers=Headers(HEADERS)).addErrback(error) except Exception, e: print(e) else: readResponseBody(response)
return x def timeout(): log.fatal('Timeout!') try: reactor.stop() except ReactorNotRunning: pass if opt.tmo >= 0.00001: reactor.callLater(opt.tmo, timeout) gets = [] client = CAClient() for pv in pvs: chan = CAClientChannel(pv, client) g = CAGet(chan, dbf_req, count, meta=meta_req, dbf_conv=dbf_dis) g.data.addCallback(data, pv, meta_req) g.data.addErrback(nodata, pv) gets.append(g.data) done = DeferredList(gets) done.addBoth(stop) reactor.run()
def defer_download_list(self, item, requests, callback, errback, doneback): dlist = [self.defer_download(r, callback, errback) for r in requests] dfd = DeferredList(dlist, consumeErrors=1) dfd.addBoth(doneback, item) return dfd
class KafkaClient(object): """Cluster-aware Kafka client `KafkaClient` maintains a cache of cluster metadata (brokers, topics, etc.) and routes each request to the appropriate broker connection. It must be bootstrapped with the address of at least one Kafka broker to retrieve the cluster metadata. You will typically use this class in combination with `Producer` or `Consumer` which provide higher-level behavior. When done with the client, call :meth:`.close()` to permanently dispose of it. This terminates any open connections and release resources. Do not set or mutate the attributes of `KafkaClient` objects. `KafkaClient` is not intended to be subclassed. :ivar reactor: Twisted reactor, as passed to the constructor. This must implement :class:`~twisted.internet.interfaces.IReactorTime` and :class:`~twisted.internet.interfaces.IReactorTCP`. :ivar str clientId: A short string used to identify the client to the server. This may appear in log messages on the server side. :ivar _brokers: Map of broker ID to broker metadata (host and port). This mapping is updated (mutated) whenever metadata is returned by a broker. :type _brokers: :class:`dict` mapping :class:`int` to :class:`afkak.common.BrokerMetadata` :ivar clients: Map of broker node ID to broker clients. Items are added to this map as a connection to a specific broker is needed. Once present the client's broker metadata is updated on change. Call :meth:`_get_brokerclient()` to get a broker client. This method constructs it and adds it to *clients* if it does not exist. Call :meth:`_close_brokerclients()` to close a broker client once it has been removed from *clients*. .. warning:: Despite the name, ``clients`` is a private attribute. Clients are removed when a full metadata fetch indicates that a broker no longer exists. Note that Afkak avoids doing a full metadata fetch whenever possible because it is an expensive operation, so it is possible for a broker client to remain in this map once the node is removed from the cluster. No requests will be routed to such a broker client, which will effectively leak. Afkak should be enhanced to remove such stale clients after a timeout period. :type clients: :class:`dict` mapping :class:`int` to :class:`_KafkaBrokerClient` :ivar float timeout: Client side request timeout, **in seconds**. :param float timeout: Client-side request timeout, **in milliseconds**. :param endpoint_factory: Callable which accepts *reactor*, *host* and *port* arguments. It must return a :class:`twisted.internet.interfaces.IStreamClientEndpoint`. Afkak does not apply a timeout to connection attempts because most endpoints include timeout logic. For example, the default of :class:`~twisted.internet.endpoints.HostnameEndpoint` applies a 30-second timeout. If an endpoint doesn't support timeouts you may need to wrap it to do so. :param retry_policy: Callable which accepts a count of *failures*. It returns the number of seconds (a `float`) to wait before the next attempt. This policy is used to schedule reconnection attempts to Kafka brokers. Use :func:`twisted.internet.application.backoffPolicy()` to generate such a callable. .. versionchanged:: Afkak 3.0.0 - The *endpoint_factory* argument was added. - The *retry_policy* argument was added. - *timeout* may no longer be `None`. Pass a large value instead. """ # This is the __CLIENT_SIDE__ timeout that's used when making requests # to our brokerclients. If a request doesn't return within this amount # of time, we errback() the deferred. This is _NOT_ the server-side # timeout which is passed into the send_{produce,fetch}_request methods # which have defaults set below. This one should be larger, btw :-) DEFAULT_REQUEST_TIMEOUT_MSECS = 10000 # Default timeout msec for fetch requests. This is how long the server # will wait trying to get enough bytes of messages to fulfill the fetch # request. When this times out on the server side, it sends back a # response with as many bytes of messages as it has. See the docs for # more caveats on this timeout. DEFAULT_FETCH_SERVER_WAIT_MSECS = 5000 # Default minimum amount of message bytes sent back on a fetch request DEFAULT_FETCH_MIN_BYTES = 4096 # Default number of msecs the lead-broker will wait for replics to # ack Produce requests before failing the request DEFAULT_REPLICAS_ACK_MSECS = 1000 clientId = u"afkak-client" _clientIdBytes = clientId.encode() def __init__(self, hosts, clientId=None, timeout=DEFAULT_REQUEST_TIMEOUT_MSECS, disconnect_on_timeout=False, correlation_id=0, reactor=None, endpoint_factory=HostnameEndpoint, retry_policy=_DEFAULT_RETRY_POLICY): self.timeout = float(timeout) / 1000.0 # msecs to secs if clientId is not None: self.clientId = clientId self._clientIdBytes = _coerce_client_id(clientId) # FIXME: clients should be private self.clients = {} # Broker-NodeID -> _KafkaBrokerClient instance self.topics_to_brokers = {} # TopicAndPartition -> BrokerMetadata self.partition_meta = {} # TopicAndPartition -> PartitionMetadata self.consumer_group_to_brokers = {} # consumer_group -> BrokerMetadata self.coordinator_fetches = {} # consumer_group -> deferred self.topic_partitions = {} # topic_id -> [0, 1, 2, ...] self.topic_errors = {} # topic_id -> topic_error_code self.correlation_id = correlation_id self.close_dlist = None # Deferred wait on broker client disconnects # Do we disconnect brokerclients when requests via them timeout? self._disconnect_on_timeout = disconnect_on_timeout self._brokers = {} # Broker-NodeID -> BrokerMetadata self._topics = {} # Topic-Name -> TopicMetadata self._closing = False # Are we shutting down/shutdown? self.update_cluster_hosts(hosts) # Store hosts and mark for lookup if reactor is None: from twisted.internet import reactor self.reactor = reactor self._endpoint_factory = endpoint_factory assert retry_policy(1) >= 0.0 self._retry_policy = retry_policy @property def clock(self): # TODO: Deprecate this return self.reactor def __repr__(self): """return a string representing this KafkaClient.""" return '<{} clientId={} hosts={} timeout={}>'.format( self.__class__.__name__, self.clientId, ' '.join('{}:{}'.format(h, p) for h, p in self._bootstrap_hosts), self.timeout, ) def update_cluster_hosts(self, hosts): """ Advise the client of possible changes to Kafka cluster hosts In general Afkak will keep up with changes to the cluster, but in a Docker environment where all the nodes in the cluster may change IP address at once or in quick succession Afkak may fail to track changes to the cluster. This function lets you notify the Afkak client that some or all of the brokers may have changed. The hosts given are used the next time the client needs a fresh connection to look up cluster metadata. Parameters ========== hosts: (string|[string]) Hosts as a single comma separated "host[:port][,host[:port]]+" string, or a list of strings: ["host[:port]", ...] """ self._bootstrap_hosts = _normalize_hosts(hosts) def reset_topic_metadata(self, *topics): """ Remove cached metadata for the named topics Metadata will be fetched again as required to satisfy requests. :param topics: Topic names. Provide at least one or the method call will have no effect. """ topics = tuple(_coerce_topic(t) for t in topics) log.debug("reset_topic_metadata(%s)", ', '.join(repr(t) for t in topics)) for topic in topics: try: partitions = self.topic_partitions[topic] except KeyError: pass else: for partition in partitions: try: del self.topics_to_brokers[TopicAndPartition( topic, partition)] except KeyError: pass del self.topic_partitions[topic] try: self.topic_errors.pop(topic) except KeyError: pass def reset_consumer_group_metadata(self, *groups): """Reset cache of what broker manages the offset for specified groups Remove the cache of what Kafka broker should be contacted when fetching or updating the committed offsets for a given consumer group or groups. NOTE: Does not cancel any outstanding requests for updates to the consumer group metadata for the specified groups. """ groups = tuple(_coerce_consumer_group(g) for g in groups) for group in groups: if group in self.consumer_group_to_brokers: del self.consumer_group_to_brokers[group] def reset_all_metadata(self): """Clear all cached metadata Metadata will be re-fetched as required to satisfy requests. """ self.topics_to_brokers.clear() self.topic_partitions.clear() self.topic_errors.clear() self.consumer_group_to_brokers.clear() def has_metadata_for_topic(self, topic): return _coerce_topic(topic) in self.topic_partitions def metadata_error_for_topic(self, topic): return self.topic_errors.get(_coerce_topic(topic), UnknownTopicOrPartitionError.errno) def partition_fully_replicated(self, topic_and_part): if topic_and_part not in self.partition_meta: return False part_meta = self.partition_meta[topic_and_part] return len(part_meta.replicas) == len(part_meta.isr) def topic_fully_replicated(self, topic): """ Determine if the given topic is fully replicated according to the currently known cluster metadata. .. note:: This relies on cached cluster metadata. You may call :meth:`load_metadata_for_topics()` first to refresh this cache. :param str topic: Topic name :returns: A boolean indicating that: 1. The number of partitions in the topic is non-zero. 2. For each partition, all replicas are in the in-sync replica (ISR) set. :rtype: :class:`bool` """ topic = _coerce_topic(topic) if topic not in self.topic_partitions: return False if not self.topic_partitions[topic]: # Don't consider an empty partition list 'fully replicated' return False return all( self.partition_fully_replicated(TopicAndPartition(topic, p)) for p in self.topic_partitions[topic]) def close(self): """Permanently dispose of the client - Immediately mark the client as closed, causing current operations to fail with :exc:`~afkak.common.CancelledError` and future operations to fail with :exc:`~afkak.common.ClientError`. - Clear cached metadata. - Close any connections to Kafka brokers. :returns: deferred that fires when all resources have been released """ # If we're already waiting on an/some outstanding disconnects # make sure we continue to wait for them... log.debug("%r: close", self) self._closing = True # Close down any clients we have brokerclients, self.clients = self.clients, None self._close_brokerclients(brokerclients.values()) # clean up other outstanding operations self.reset_all_metadata() return self.close_dlist or defer.succeed(None) def load_metadata_for_topics(self, *topics): """Discover topic metadata and brokers Afkak internally calls this method whenever metadata is required. :param str topics: Topic names to look up. The resulting metadata includes the list of topic partitions, brokers owning those partitions, and which partitions are in sync. Fetching metadata for a topic may trigger auto-creation if that is enabled on the Kafka broker. When no topic name is given metadata for *all* topics is fetched. This is an expensive operation, but it does not trigger topic creation. :returns: :class:`Deferred` for the completion of the metadata fetch. This will fire with ``True`` on success, ``None`` on cancellation, or fail with an exception on error. On success, topic metadata is available from the attributes of :class:`KafkaClient`: :data:`~KafkaClient.topic_partitions`, :data:`~KafkaClient.topics_to_brokers`, etc. """ topics = tuple(_coerce_topic(t) for t in topics) log.debug("%r: load_metadata_for_topics(%s)", self, ', '.join(repr(t) for t in topics)) fetch_all_metadata = not topics # create the request requestId = self._next_id() request = KafkaCodec.encode_metadata_request(self._clientIdBytes, requestId, topics) # Callbacks for the request deferred... def _handleMetadataResponse(response): # Decode the response brokers, topics = KafkaCodec.decode_metadata_response(response) log.debug("%r: got metadata brokers=%r topics=%r", self, brokers, topics) # If we fetched the metadata for all topics, then store away the # received metadata for diagnostics. if fetch_all_metadata: self._brokers = brokers self._topics = topics # Iff we were fetching for all topics, and we got at least one # broker back, then remove brokers when we update our brokers ok_to_remove = (fetch_all_metadata and len(brokers)) # Take the metadata we got back, update our self.clients, and # if needed disconnect or connect from/to old/new brokers self._update_brokers(brokers.values(), remove=ok_to_remove) # Now loop through all the topics/partitions in the response # and setup our cache/data-structures for topic, topic_metadata in topics.items(): _, topic_error, partitions = topic_metadata self.reset_topic_metadata(topic) self.topic_errors[topic] = topic_error if not partitions: log.warning('No partitions for %s, Err:%d', topic, topic_error) continue self.topic_partitions[topic] = [] for partition, meta in partitions.items(): self.topic_partitions[topic].append(partition) topic_part = TopicAndPartition(topic, partition) self.partition_meta[topic_part] = meta if meta.leader == -1: log.warning('No leader for topic %s partition %s', topic, partition) self.topics_to_brokers[topic_part] = None else: self.topics_to_brokers[topic_part] = brokers[ meta.leader] self.topic_partitions[topic] = sorted( self.topic_partitions[topic]) return True def _handleMetadataErr(err): # This should maybe do more cleanup? if err.check(t_CancelledError, CancelledError): # Eat the error # XXX Shouldn't this return False? The success branch # returns True. return None log.error("Failed to retrieve metadata:%s", err) raise KafkaUnavailableError( "Unable to load metadata from configured " "hosts: {!r}".format(err)) # Send the request, add the handlers d = self._send_broker_unaware_request(requestId, request) d.addCallbacks(_handleMetadataResponse, _handleMetadataErr) return d def load_consumer_metadata_for_group(self, group): """ Determine broker for the consumer metadata for the specified group Returns a deferred which callbacks with True if the group's coordinator could be determined, or errbacks with ConsumerCoordinatorNotAvailableError if not. Parameters ---------- group: group name as `str` """ group = _coerce_consumer_group(group) log.debug("%r: load_consumer_metadata_for_group(%r)", self, group) # If we are already loading the metadata for this group, then # just return the outstanding deferred if group in self.coordinator_fetches: d = defer.Deferred() self.coordinator_fetches[group][1].append(d) return d # No outstanding request, create a new one requestId = self._next_id() request = KafkaCodec.encode_consumermetadata_request( self._clientIdBytes, requestId, group) # Callbacks for the request deferred... def _handleConsumerMetadataResponse(response_bytes): # Decode the response (returns ConsumerMetadataResponse) response = KafkaCodec.decode_consumermetadata_response( response_bytes) log.debug("%r: load_consumer_metadata_for_group(%r) -> %r", self, group, response) if response.error: raise BrokerResponseError.errnos.get(response.error, UnknownError)(response) bm = BrokerMetadata(response.node_id, response.host, response.port) self.consumer_group_to_brokers[group] = bm self._update_brokers([bm]) return True def _handleConsumerMetadataErr(err): log.error("Failed to retrieve consumer metadata for group %r", group, exc_info=(err.type, err.value, err.getTracebackObject())) # Clear any stored value for the group's coordinator self.reset_consumer_group_metadata(group) # FIXME: This exception should chain from err. raise ConsumerCoordinatorNotAvailableError( "Coordinator for group {!r} not available".format(group), ) def _propagate(result): [_, ds] = self.coordinator_fetches.pop(group, None) for d in ds: d.callback(result) # Send the request, add the handlers request_d = self._send_broker_unaware_request(requestId, request) d = defer.Deferred() # Save the deferred under the fetches for this group self.coordinator_fetches[group] = (request_d, [d]) request_d.addCallback(_handleConsumerMetadataResponse) request_d.addErrback(_handleConsumerMetadataErr) request_d.addBoth(_propagate) return d @inlineCallbacks def send_produce_request(self, payloads=None, acks=1, timeout=DEFAULT_REPLICAS_ACK_MSECS, fail_on_error=True, callback=None): """ Encode and send some ProduceRequests ProduceRequests will be grouped by (topic, partition) and then sent to a specific broker. Output is a list of responses in the same order as the list of payloads specified Parameters ---------- payloads: list of ProduceRequest acks: How many Kafka broker replicas need to write before the leader replies with a response timeout: How long the server has to receive the acks from the replicas before returning an error. fail_on_error: boolean, should we raise an Exception if we encounter an API error? callback: function, instead of returning the ProduceResponse, first pass it through this function Return ------ a deferred which callbacks with a list of ProduceResponse Raises ------ FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ encoder = partial(KafkaCodec.encode_produce_request, acks=acks, timeout=timeout) if acks == 0: decoder = None else: decoder = KafkaCodec.decode_produce_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_fetch_request(self, payloads=None, fail_on_error=True, callback=None, max_wait_time=DEFAULT_FETCH_SERVER_WAIT_MSECS, min_bytes=DEFAULT_FETCH_MIN_BYTES): """ Encode and send a FetchRequest Payloads are grouped by topic and partition so they can be pipelined to the same brokers. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ if (max_wait_time / 1000) > (self.timeout - 0.1): raise ValueError( "%r: max_wait_time: %d must be less than client.timeout by " "at least 100 milliseconds.", self, max_wait_time) encoder = partial(KafkaCodec.encode_fetch_request, max_wait_time=max_wait_time, min_bytes=min_bytes) # resps is a list of FetchResponse() objects, each of which can hold # 1-n messages. resps = yield self._send_broker_aware_request( payloads, encoder, KafkaCodec.decode_fetch_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_request(self, payloads=None, fail_on_error=True, callback=None): resps = yield self._send_broker_aware_request( payloads, KafkaCodec.encode_offset_request, KafkaCodec.decode_offset_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_fetch_request(self, group, payloads=None, fail_on_error=True, callback=None): """ Takes a group (string) and list of OffsetFetchRequest and returns a list of OffsetFetchResponse objects """ encoder = partial(KafkaCodec.encode_offset_fetch_request, group=group) decoder = KafkaCodec.decode_offset_fetch_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder, consumer_group=group) returnValue( self._handle_responses(resps, fail_on_error, callback, group)) @inlineCallbacks def send_offset_commit_request(self, group, payloads=None, fail_on_error=True, callback=None, group_generation_id=-1, consumer_id=''): """Send a list of OffsetCommitRequests to the Kafka broker for the given consumer group. Args: group (str): The consumer group to which to commit the offsets payloads ([OffsetCommitRequest]): List of topic, partition, offsets to commit. fail_on_error (bool): Whether to raise an exception if a response from the Kafka broker indicates an error callback (callable): a function to call with each of the responses before returning the returned value to the caller. group_generation_id (int): Must currently always be -1 consumer_id (str): Must currently always be empty string Returns: [OffsetCommitResponse]: List of OffsetCommitResponse objects. Will raise KafkaError for failed requests if fail_on_error is True """ group = _coerce_consumer_group(group) encoder = partial(KafkaCodec.encode_offset_commit_request, group=group, group_generation_id=group_generation_id, consumer_id=consumer_id) decoder = KafkaCodec.decode_offset_commit_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder, consumer_group=group) returnValue( self._handle_responses(resps, fail_on_error, callback, group)) # # # Private Methods # # # def _handle_responses(self, responses, fail_on_error, callback=None, consumer_group=None): out = [] for resp in responses: try: _check_error(resp) except (UnknownTopicOrPartitionError, NotLeaderForPartitionError): log.error('Error found in response: %s', resp) self.reset_topic_metadata(resp.topic) if fail_on_error: raise except (OffsetsLoadInProgressError, NotCoordinatorForConsumerError, ConsumerCoordinatorNotAvailableError): log.error('Error found in response: %s Consumer Group: %s', resp, consumer_group) self.reset_consumer_group_metadata(consumer_group) if fail_on_error: raise if callback is not None: out.append(callback(resp)) else: out.append(resp) return out def _get_brokerclient(self, node_id): """ Get a broker client. :param int node_id: Broker node ID :raises KeyError: for an unknown node ID :returns: :class:`_KafkaBrokerClient` """ if self._closing: raise ClientError( "Cannot get broker client for node_id={}: {} has been closed". format(node_id, self)) if node_id not in self.clients: broker_metadata = self._brokers[node_id] log.debug("%r: creating client for %s", self, broker_metadata) self.clients[node_id] = _KafkaBrokerClient( self.reactor, self._endpoint_factory, broker_metadata, self.clientId, self._retry_policy, ) return self.clients[node_id] def _close_brokerclients(self, clients): """ Close the given broker clients. :param clients: Iterable of `_KafkaBrokerClient` """ def _log_close_failure(failure, brokerclient): log.debug('BrokerClient: %s close result: %s: %s', brokerclient, failure.type.__name__, failure.getErrorMessage()) def _clean_close_dlist(result, close_dlist): # If there aren't any other outstanding closings going on, then # close_dlist == self.close_dlist, and we can reset it. if close_dlist == self.close_dlist: self.close_dlist = None if not self.close_dlist: dList = [] else: log.debug("%r: _close_brokerclients has nested deferredlist: %r", self, self.close_dlist) dList = [self.close_dlist] for brokerClient in clients: log.debug("Calling close on: %r", brokerClient) d = brokerClient.close().addErrback(_log_close_failure, brokerClient) dList.append(d) self.close_dlist = DeferredList(dList) self.close_dlist.addBoth(_clean_close_dlist, self.close_dlist) def _update_brokers(self, brokers, remove=False): """ Update `self._brokers` and `self.clients` Update our self.clients based on brokers in received metadata Take the received dict of brokers and reconcile it with our current list of brokers (self.clients). If there is a new one, bring up a new connection to it, and if remove is True, and any in our current list aren't in the metadata returned, disconnect from it. :param brokers: Iterable of `BrokerMetadata`. A client will be created for every broker given if it doesn't yet exist. :param bool remove: Is this metadata for *all* brokers? If so, clients for brokers which are no longer found in the metadata will be closed. """ log.debug("%r: _update_brokers(%r, remove=%r)", self, brokers, remove) brokers_by_id = {bm.node_id: bm for bm in brokers} self._brokers.update(brokers_by_id) # Update the metadata of broker clients that already exist. for node_id, broker_meta in brokers_by_id.items(): if node_id not in self.clients: continue self.clients[node_id].updateMetadata(broker_meta) # Remove any clients for brokers which no longer exist. if remove: to_close = [ self.clients.pop(node_id) for node_id in set(self.clients) - set(brokers_by_id) ] if to_close: self._close_brokerclients(to_close) @inlineCallbacks def _get_leader_for_partition(self, topic, partition): """ Returns the leader for a partition or None if the partition exists but has no leader. PartitionUnavailableError will be raised if the topic or partition is not part of the metadata. """ key = TopicAndPartition(topic, partition) # reload metadata whether the partition is not available # or has no leader (broker is None) if self.topics_to_brokers.get(key) is None: yield self.load_metadata_for_topics(topic) if key not in self.topics_to_brokers: raise PartitionUnavailableError("%s not available" % str(key)) returnValue(self.topics_to_brokers[key]) @inlineCallbacks def _get_coordinator_for_group(self, consumer_group): """Returns the coordinator (broker) for a consumer group Returns the broker for a given consumer group or Raises ConsumerCoordinatorNotAvailableError """ if self.consumer_group_to_brokers.get(consumer_group) is None: yield self.load_consumer_metadata_for_group(consumer_group) returnValue(self.consumer_group_to_brokers.get(consumer_group)) def _next_id(self): """Generate a new correlation id.""" # modulo to keep within int32 (signed) self.correlation_id = (self.correlation_id + 1) % 2**31 return self.correlation_id def _make_request_to_broker(self, broker, requestId, request, **kwArgs): """Send a request to the specified broker.""" def _timeout_request(broker, requestId): """The time we allotted for the request expired, cancel it.""" try: # FIXME: This should be done by calling .cancel() on the Deferred # returned by the broker client. broker.cancelRequest( requestId, reason=RequestTimedOutError( 'Request: {} cancelled due to timeout'.format( requestId))) except KeyError: # pragma: no cover This should never happen... log.exception( 'ERROR: Failed to find key for timed-out ' 'request. Broker: %r Req: %d', broker, requestId) raise if self._disconnect_on_timeout: broker.disconnect() def _alert_blocked_reactor(timeout, start): """Complain if this timer didn't fire before the timeout elapsed""" now = self.reactor.seconds() if now >= (start + timeout): log.warning('Reactor was starved for %r seconds', now - start) def _cancel_timeout(result, dc): """Request completed/cancelled, cancel the timeout delayedCall.""" if dc.active(): dc.cancel() return result # Make the request to the specified broker log.debug('_mrtb: sending %s to broker %r', _ReprRequest(request), broker) d = broker.makeRequest(requestId, request, **kwArgs) # Set a delayedCall to fire if we don't get a reply in time dc = self.reactor.callLater(self.timeout, _timeout_request, broker, requestId) # Set a delayedCall to complain if the reactor has been blocked rc = self.reactor.callLater((self.timeout * 0.9), _alert_blocked_reactor, self.timeout, self.reactor.seconds()) # Setup a callback on the request deferred to cancel both callLater d.addBoth(_cancel_timeout, dc) d.addBoth(_cancel_timeout, rc) return d @inlineCallbacks def _send_broker_unaware_request(self, requestId, request): """ Attempt to send a broker-agnostic request to one of the known brokers: 1. Try each connected broker (in random order) 2. Try each known but unconnected broker (in random order) 3. Try each of the bootstrap hosts (in random order) :param bytes request: The bytes of a Kafka `RequestMessage`_ structure. It must have a unique (to this connection) correlation ID. :returns: API response message for *request* :rtype: Deferred[bytes] :raises: `KafkaUnavailableError` when making the request of all known hosts has failed. """ node_ids = list(self._brokers.keys()) # Randomly shuffle the brokers to distribute the load random.shuffle(node_ids) # Prioritize connected brokers def connected(node_id): try: return self.clients[node_id].connected() except KeyError: return False node_ids.sort(reverse=True, key=connected) for node_id in node_ids: broker = self._get_brokerclient(node_id) try: log.debug('_sbur: sending %s to broker %r', _ReprRequest(request), broker) d = self._make_request_to_broker(broker, requestId, request) resp = yield d returnValue(resp) except KafkaError as e: log.warning(("Will try next server after %s" " failed against server %s:%i. Error: %s"), _ReprRequest(request), broker.host, broker.port, e) # The request was not handled, likely because no broker metadata has # loaded yet (or all broker connections have failed). Fall back to # boostrapping. returnValue((yield self._send_bootstrap_request(request))) @inlineCallbacks def _send_bootstrap_request(self, request): """Make a request using an ephemeral broker connection This routine is used to make broker-unaware requests to get the initial cluster metadata. It cycles through the configured hosts, trying to connect and send the request to each in turn. This temporary connection is closed once a response is received. Note that most Kafka APIs require requests be sent to a specific broker. This method will only function for broker-agnostic requests like: * `Metadata <https://kafka.apache.org/protocol.html#The_Messages_Metadata>`_ * `FindCoordinator <https://kafka.apache.org/protocol.html#The_Messages_FindCoordinator>`_ :param bytes request: The bytes of a Kafka `RequestMessage`_ structure. It must have a unique (to this connection) correlation ID. :returns: API response message for *request* :rtype: Deferred[bytes] :raises: - `KafkaUnavailableError` when making the request of all known hosts has failed. - `twisted.internet.defer.TimeoutError` when connecting or making a request exceeds the timeout. """ hostports = list(self._bootstrap_hosts) random.shuffle(hostports) for host, port in hostports: ep = self._endpoint_factory(self.reactor, host, port) try: protocol = yield ep.connect(_bootstrapFactory) except Exception as e: log.debug("%s: bootstrap connect to %s:%s -> %s", self, host, port, e) continue try: response = yield protocol.request(request).addTimeout( self.timeout, self.reactor) except Exception: log.debug("%s: bootstrap %s to %s:%s failed", self, _ReprRequest(request), host, port, exc_info=True) else: returnValue(response) finally: protocol.transport.loseConnection() raise KafkaUnavailableError( "Failed to bootstrap from hosts {}".format(hostports)) @inlineCallbacks def _send_broker_aware_request(self, payloads, encoder_fn, decode_fn, consumer_group=None): """ Group a list of request payloads by topic+partition and send them to the leader broker for that partition using the supplied encode/decode functions Params ====== payloads: list of object-like entities with a topic and partition attribute. payloads must be grouped by (topic, partition) tuples. encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes consumer_group: [string], optional. Indicates the request should be directed to the Offset Coordinator for the specified consumer_group. Return ====== deferred yielding a list of response objects in the same order as the supplied payloads, or None if decode_fn is None. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError, """ # Calling this without payloads is nonsensical if not payloads: raise ValueError("Payloads parameter is empty") # Group the requests by topic+partition original_keys = [] payloads_by_broker = collections.defaultdict(list) # Go through all the payloads, lookup the leader/coordinator for that # payload's topic/partition or consumer group. If there's no # leader/coordinator (broker), raise. For each broker, keep # a list of the payloads to be sent to it. Also, for each payload in # the list of payloads, make a corresponding list (original_keys) with # the topic/partition in the same order, so we can lookup the returned # result(s) by that topic/partition key in the set of returned results # and return them in a list the same order the payloads were supplied for payload in payloads: # get leader/coordinator, depending on consumer_group if consumer_group is None: leader = yield self._get_leader_for_partition( payload.topic, payload.partition) if leader is None: raise LeaderUnavailableError( "Leader not available for topic %s partition %s" % (payload.topic, payload.partition)) else: leader = yield self._get_coordinator_for_group(consumer_group) if leader is None: raise ConsumerCoordinatorNotAvailableError( "Coordinator not available for group: %s" % (consumer_group)) payloads_by_broker[leader].append(payload) original_keys.append((payload.topic, payload.partition)) # Accumulate the responses in a dictionary acc = {} # The kafka server doesn't send replies to produce requests # with acks=0. In that case, our decode_fn will be # None, and we need to let the brokerclient know not # to expect a reply. makeRequest() returns a deferred # regardless, but in the expectResponse=False case, it will # fire as soon as the request is sent, and it can errBack() # due to being cancelled prior to the broker being able to # send the request. expectResponse = decode_fn is not None # keep a list of payloads that were failed to be sent to brokers failed_payloads = [] # Keep track of outstanding requests in a list of deferreds inFlight = [] # and the payloads that go along with them payloadsList = [] # For each broker, send the list of request payloads, for broker_meta, payloads in payloads_by_broker.items(): broker = self._get_brokerclient(broker_meta.node_id) requestId = self._next_id() request = encoder_fn(client_id=self._clientIdBytes, correlation_id=requestId, payloads=payloads) # Make the request d = self._make_request_to_broker(broker, requestId, request, expectResponse=expectResponse) inFlight.append(d) payloadsList.append(payloads) # Wait for all the responses to come back, or the requests to fail results = yield DeferredList(inFlight, consumeErrors=True) # We now have a list of (succeeded, response/Failure) tuples. Check 'em for (success, response), payloads in zip(results, payloadsList): if not success: # The brokerclient deferred was errback()'d: # The send failed, or this request was cancelled (by timeout) log.debug("%r: request:%r to broker failed: %r", self, payloads, response) failed_payloads.extend([(p, response) for p in payloads]) continue if not expectResponse: continue # Successful request/response. Decode it and store by topic/part for response in decode_fn(response): acc[(response.topic, response.partition)] = response # Order the accumulated responses by the original key order # Note that this scheme will throw away responses which we did # not request. See test_send_fetch_request, where the response # includes an error, but for a topic/part we didn't request. # Since that topic/partition isn't in original_keys, we don't pass # it back from here and it doesn't error out. # If any of the payloads failed, fail responses = [acc[k] for k in original_keys if k in acc] if acc else [] if failed_payloads: self.reset_all_metadata() raise FailedPayloadsError(responses, failed_payloads) returnValue(responses)
def power_query_all(system_id, hostname, power_info, timeout=30): """Query every connected rack controller and get the power status from all rack controllers. :return: a tuple with the power state for the node and a list of rack controller system_id's that responded and a list of rack controller system_id's that failed to respond. """ deferreds = [] call_order = [] clients = getAllClients() for client in clients: d = client(PowerQuery, system_id=system_id, hostname=hostname, power_type=power_info.power_type, context=power_info.power_parameters) deferreds.append(d) call_order.append(client.ident) def cb_result(result): power_states = set() responded_rack_ids = set() failed_rack_ids = set() for rack_system_id, (success, response) in zip(call_order, result): if success: power_state = response["state"] if power_state == POWER_STATE.ERROR: # Rack controller cannot access this BMC. failed_rack_ids.add(rack_system_id) else: # Rack controller can access this BMC. power_states.add(response["state"]) responded_rack_ids.add(rack_system_id) else: failed_rack_ids.add(rack_system_id) return (pick_best_power_state(power_states), responded_rack_ids, failed_rack_ids) # Process all defers and build the result. dList = DeferredList(deferreds, consumeErrors=True) dList.addCallback(cb_result) def cancel(): try: dList.cancel() except: # Don't care about the error. pass # Create the canceller if timeout provided. if timeout is None: canceller = None else: canceller = reactor.callLater(timeout, cancel) def done(): if canceller is not None and canceller.active(): canceller.cancel() # Cancel the canceller once finished. dList.addBoth(callOut, done) return dList
print 'Response code:', response.code print 'Response phrase:', response.phrase print 'Response headers:' print pformat(list(response.headers.getAllRawHeaders())) finished = Deferred() response.deliverBody(BeginningPrinter(finished)) return finished def cbShutdown(ignored): reactor.stop() agent = Agent(reactor) urls = ['http://static4.x1x.com/images/title/11/26/49/001-3.jpg', 'http://static2.x1x.com/images/title/11/26/49/002-2.jpg', 'http://static5.x1x.com/images/title/11/26/49/002-1.jpg'] deffer_list = [] for url in urls: d = agent.request( 'GET', url, Headers({'User-Agent': ['Twisted Web Client Example']}), None) d.addCallback(cbRequest) deffer_list.append(d) dl = DeferredList(deffer_list) #dl.addCallback(cbRequest) dl.addBoth(cbShutdown) reactor.run()
resolver = LocationResolver(reactor) def handleResult(resolved, url): print("{0} -> {1}".format(url, resolved)) if resolved: d = resolver.resolve(resolved) d.addCallback(handleResult, resolved) d.addErrback(printError, resolved) return d def printError(failure, url): print(">>> {0}".format(url)) failure.printTraceback() dlist = [] for url in sys.argv[1:]: if not ( url.startswith('http://') or url.startswith('https://') ): url = "http://{0}".format(url) d = resolver.resolve(url) d.addCallback(handleResult, url) d.addErrback(printError, url) dlist.append(d) d = DeferredList(dlist) d.addBoth(lambda _: reactor.stop()) reactor.run()
class BackupTransaction_Host(transactions.BackupTransaction, AbstractHostTransaction): """BACKUP transaction on the Host. @ivar target_hosts: the dictionary mapping the Host object to the C{col.Counter} of how many chunks (of some size code) should be uploaded to that Host. @type target_hosts: col.Mapping @ivar __chunks_by_size_code: the mapping from the chunk size code to all the chunks of this size. @type __chunks_by_size_code: col.Mapping @ivar __all_chunks: the set of the chunks which are expected to be present in this dataset. @type __all_chunks: set @invariant: consists_of(__all_chunks, ChunkFromFilesFinal) @ivar __uploading_chunks: the list of the chunks in the dataset which were not yet uploaded at the beginning of this transaction. @type __uploading_chunks: set @invariant: consists_of(__uploading_chunks, ChunkFromFilesFinal) @ivar __uploaded_chunks: the list of the chunks in this dataset whose upload is already completed. @type __uploaded_chunks: set @invariant: consists_of(__uploaded_chunks, Chunk) @ivar __progress_notif_deferreds: the list of deferred objects, each related to one of the progress notifications being queued to send out. @type __progress_notif_deferreds: list @ivariant: consists_of(__progress_notif_deferreds, Deferred) @ivar paused: the variable for external controlling whether the backup transaction is temporarily suspended (but should be continued as soon as the variable is set back to C{False}). @type paused: bool @invariant: self.__all_chunks == self.__uploading_chunks | self.__uploaded_chunks """ __slots__ = ('paused', 'dataset', '__chunks_by_size_code', 'target_hosts', '__all_chunks', '__uploading_chunks', '__uploaded_chunks', '__progress_notif_deferreds', '__progress_notif_deferredlist', '__random', '__cryptographer', 'ack_result_code') # Key: dataset uuid; # value: the BackupTransaction_Host transaction per_dataset_transactions = WeakValueDictionary() per_dataset_transactions_lock = Lock() class State(AbstractHostTransaction.State): """The state for the BACKUP transaction on the Host.""" __slots__ = () name = 'BACKUP' def __init__(self, *args, **kwargs): """Constructor.""" super(BackupTransaction_Host, self).__init__(*args, **kwargs) self.__random = Random(42) with db.RDB() as rdbw: my_user = HostQueries.HostUsers.get_my_user(rdbw) self.__cryptographer = None self.__progress_notif_deferreds = [] self.__progress_notif_deferredlist = None self.ack_result_code = BackupMessage.ResultCodes.OK self.paused = False self.dataset = None @classmethod def __create_dataset_from_incoming_message(self, _message): """ When the incoming message is received, the dataset is created from its data; i.e. either the existing one is used, or a completely new one is generated. @rtype: DatasetOnChunks, NoneType """ _my_host = _message.dst if _message.dataset_uuid is not None: # Using the prebuilt dataset. with db.RDB() as rdbw: _dataset = \ HostQueries.HostDatasets.get_my_ds_in_progress( _my_host.uuid, _message.dataset_uuid, rdbw) if _dataset is not None: assert _dataset.uuid == _message.dataset_uuid, \ (_dataset.uuid, _message.dataset_uuid) else: raise Exception('Not supported message: {!r}'.format(_message)) return _dataset @pauses def on_begin(self): """ @todo: Add errback too. """ cls = self.__class__ _message = self.message _host = _message.dst logger.debug('Starting backup...') _dataset = self.dataset \ = cls.__create_dataset_from_incoming_message(_message) if self.manager.app.feature_set.per_group_encryption: # Read group key from the user group with db.RDB() as rdbw: _ugroup = Queries.Inhabitants.get_ugroup_by_uuid( _dataset.ugroup_uuid, rdbw) group_key = _ugroup.enc_key else: group_key = None self.__cryptographer = Cryptographer(group_key=group_key, key_generator=None) logger.debug('Created dataset %r.', _dataset) if _dataset is None: raise Exception('No dataset!') else: self.__notify_about_backup_started() self.__notify_about_backup_running() ds_uuid = _dataset.uuid with cls.per_dataset_transactions_lock: if ds_uuid in cls.per_dataset_transactions: self.ack_result_code = BackupMessage.ResultCodes \ .GENERAL_FAILURE raise Exception('The dataset {} is already being ' 'backed up'.format(ds_uuid)) else: cls.per_dataset_transactions[ds_uuid] = self # Force copying it to dict, to don't cause # race conditions during the logger message serialization. logger.debug('Added backup %r, per dataset transactions ' 'are now %r', ds_uuid, dict(cls.per_dataset_transactions)) if _dataset is None: raise Exception('The dataset {} is not found.'.format(ds_uuid)) # Initialize chunks. # Please note that these chunks may include the ones # which are actually present in the cloud already # but under a different UUID. # This will be fixed later, after NEED_INFO_ACK is received. # All chunks, including the already uploaded ones; # contains ChunkFromFilesFinal objects. # _dataset is MyDatasetOnChunks. # dataset.__chunks is list of ChunkFromFilesFinal. self.__all_chunks = set(_dataset.chunks()) assert consists_of(self.__all_chunks, ChunkFromFilesFinal), \ repr(self.__all_chunks) # Already uploaded chunks; contains Chunk objects. with db.RDB() as rdbw: self.__uploaded_chunks = \ set(HostQueries.HostChunks .get_uploaded_chunks(_dataset.uuid, rdbw=rdbw)) assert consists_of(self.__uploaded_chunks, Chunk), \ repr(self.__uploaded_chunks) # Only the pending chunks. self.__uploading_chunks = {ch for ch in self.__all_chunks if ch not in self.__uploaded_chunks} assert consists_of(self.__uploading_chunks, ChunkFromFilesFinal), \ repr(self.__uploading_chunks) # # Now create the NEED_INFO transaction. # But only if we have chunks to ask! # if self.__uploading_chunks: _query = { 'select': ('chunks.uuid', 'uuid'), 'from': 'chunks', 'where': {'["hash", "size", "uuid"]': [c for c in self.__uploading_chunks if c.hash is not None]} } nifn_tr = self.manager.create_new_transaction( name='NEED_INFO_FROM_NODE', src=_message.dst, dst=self.manager.app.primary_node, parent=self, # NEED_INFO_FROM_NODE-specific query=_query) nifn_tr.completed.addCallbacks(self._on_child_nifn_completed, partial(logger.error, 'NI issue: %r')) else: logger.debug('IMHO, no new chunks to upload. ' 'Proceeding directly.') # Go to the next step directly. self._ask_for_backup_hosts() @exceptions_logged(logger) @contract_epydoc def _on_child_nifn_completed(self, ni_state): """ This method is called after the child NEED_INFO_FROM_NODE transaction has succeeded. @type ni_state: NeedInfoFromNodeTransaction_Host.State """ _message = self.message logger.debug('Received response to NEED_INFO_FROM_NODE') # This is a dictionary mapping local chunk UUID # to the cloud chunk UUID number which should be used instead. uuids_to_fix = ni_state.ack_result logger.debug('Need to fix: %r', uuids_to_fix) _all_chunks = self.__all_chunks if uuids_to_fix: assert isinstance(uuids_to_fix, col.Mapping), repr(uuids_to_fix) # Which chunks are present under a different name? misnamed_chunk_uuids = {k for k, v in uuids_to_fix.iteritems() if k != v} assert consists_of(misnamed_chunk_uuids, UUID), \ repr(misnamed_chunk_uuids) misnamed_chunks = {c for c in self.__all_chunks if c.uuid in misnamed_chunk_uuids} # 1. These chunks should be considered already uploaded,.. self.__uploaded_chunks |= misnamed_chunks self.__uploading_chunks -= misnamed_chunks # 2. ... renamed in the database,.. HostQueries.HostChunks.mark_chunks_as_already_existing( uuids_to_fix) # 3. ... and renamed in the state (while needed). for ch in self.__all_chunks: if ch.uuid in uuids_to_fix: ch.uuid = uuids_to_fix[ch.uuid] # Now we finally know the real set of the chunks which need # to be uploaded. Go to the next step. self._ask_for_backup_hosts() def _ask_for_backup_hosts(self): """ Next step of the procedure: ask for hosts which will accept out chunks. """ _message = self.message if __debug__: logger.debug('Backing up: %i blocks, %i chunks', sum(len(ch.blocks) for ch in self.__uploading_chunks), len(self.__all_chunks)) # Select only size codes with non-empty chunk lists self.__chunks_by_size_code = {} for chunk in self.__uploading_chunks: self.__chunks_by_size_code.setdefault(chunk.maxsize_code, []) \ .append(chunk) logger.debug('%s with uploading dataset', 'Start' if self.__uploaded_chunks else 'Proceed') # Now let's start the nested PROVIDE_BACKUP_HOSTS transaction. _chunk_count_by_size_code = \ {k: len(v) for k, v in self.__chunks_by_size_code.iteritems()} pbh_tr = self.manager.create_new_transaction( name='PROVIDE_BACKUP_HOSTS', src=_message.dst, dst=_message.src, parent=self, # PROVIDE_BACKUP_HOSTS-specific chunk_count_by_size_code=_chunk_count_by_size_code) pbh_tr.completed.addCallbacks(self._on_child_pbh_completed, partial(logger.error, 'PBH issue: %r')) @exceptions_logged(logger) @contract_epydoc def _on_child_pbh_completed(self, pbh_state): """ This method is called after the child PROVIDE_BACKUP_HOSTS transaction has succeeded. @type pbh_state: ProvideBackupHostsTransaction_Host.State """ if not ProvideBackupHostsMessage.ResultCodes \ .is_good(pbh_state.ack_result_code): # Our backup request was rejected! self.ack_result_code = \ BackupMessage.ResultCodes.from_provide_backup_host_result_code( pbh_state.ack_result_code) self.__complete_backup_transaction() else: # Proceed with the backup self.target_hosts = \ _target_hosts = \ {Host(uuid=uuid, urls=per_host.urls): col.Counter(per_host.chunks_by_size) for uuid, per_host in pbh_state.ack_hosts_to_use .iteritems()} self.manager.app.known_peers.update( {host.uuid: host for host in _target_hosts.iterkeys()}) logger.debug('CHILD PBH COMPLETED (%r), using target hosts ' 'for backup: %r', pbh_state, _target_hosts.keys()) _message = self.message @exceptions_logged(logger) @contract_epydoc def _on_dataset_progress_success(p_state): """ This method is called after the PROGRESS transaction reporting to the node about the dataset has succeeded. @type p_state: ProgressTransaction_Host.State """ logger.info('Reported to the Node about the dataset %r ' 'successfully.', self.dataset) self.__notify_about_upload_progress() self.__upload_more_chunks() # Notify the node about the new dataset which started uploading. p1_tr = self.manager.create_new_transaction(name='PROGRESS', src=_message.dst, dst=_message.src, parent=self, # PROGRESS-specific dataset=self.dataset) p1_tr.completed.addCallbacks( _on_dataset_progress_success, partial(logger.error, 'Dataset reporting issue: %r')) def __upload_more_chunks(self): """ Try upload some more chunks. If no more chunks can be uploaded, unpause the transaction. """ assert not in_main_thread() logger.debug('Should we upload more chunks? ' 'Current ack_result_code is %r', self.ack_result_code) if BackupMessage.ResultCodes.is_good(self.ack_result_code): logger.debug('So far so good...') if not self.paused: self.__notify_about_backup_running() if not self.__try_upload_next_chunks(): self._no_more_chunks_but_wait_for_progresses() else: logger.debug('%r paused, retry in a second', self) callLaterInThread(1.0, self.__upload_more_chunks) else: logger.debug('Must wait for progresses') self._no_more_chunks_but_wait_for_progresses() @exceptions_logged(logger) def _no_more_chunks_but_wait_for_progresses(self): """ We are almost done with the transaction, but still probably need to wait for PROGRESS transaction before we can safely report backup as completed. @precondition: self.__progress_notif_deferredlist is None @postcondition: self.__progress_notif_deferredlist is not None """ self.__progress_notif_deferredlist = \ DeferredList(self.__progress_notif_deferreds) self.__progress_notif_deferredlist.addBoth( exceptions_logged(logger)( lambda ignore: self._no_more_chunks())) logger.debug('We have no more chunks to upload, ' 'but waiting for the progresses: %r', self.__progress_notif_deferreds) @exceptions_logged(logger) def _no_more_chunks(self): logger.debug('All nested PROGRESS messages completed.') if BackupMessage.ResultCodes.is_good(self.ack_result_code): logger.debug('No more chunks!') _message = self.message _my_host = self.manager.app.host self.dataset.time_completed = datetime.utcnow() @exceptions_logged(logger) @contract_epydoc def _on_final_dataset_progress_success(p_state): """ This method is called after the PROGRESS transaction reporting to the node about the dataset has succeeded. @type p_state: ProgressTransaction_Host.State """ logger.debug('Finalizing the %r backup.', self.dataset) with db.RDB() as rdbw: # Mark the current dataset as completed # only after the response from the node is received. Queries.Datasets.update_dataset(_my_host.uuid, self.dataset, rdbw) # self.__all_chunks is a collection of ChunkFromFilesFinal # Notify the node about the dataset which was just uploaded. p2_tr = self.manager.create_new_transaction( name='PROGRESS', src=_my_host, dst=_message.src, parent=self, # PROGRESS-specific dataset=self.dataset, chunks=self.__all_chunks) p2_tr.completed.addCallbacks( _on_final_dataset_progress_success, partial(logger.error, 'Dataset final reporting issue: %r')) @exceptions_logged(logger) def _complete_backup_on_progress_completed(ignore): self.__complete_backup_transaction() p2_tr.completed.addBoth(_complete_backup_on_progress_completed) else: self.__complete_backup_transaction() def __notify_about_backup_started(self): _dataset = self.dataset logger_status_backup.info('Backup started', extra={'status': 'started', 'result_str': None, 'ds_uuid': _dataset.uuid, 'relaunch_in_mins': None}) def __notify_about_backup_running(self): _dataset = self.dataset logger_status_backup.info('Backup is running', extra={'status': 'running', 'result_str': None, 'ds_uuid': _dataset.uuid, 'relaunch_in_mins': None}) def __notify_about_backup_end(self, is_good): _dataset = self.dataset _result_str = BackupMessage.ResultCodes.to_str(self.ack_result_code) logger_status_backup.info( 'Backup ended', extra={ 'status': 'ok' if is_good else 'fail', 'result_str': _result_str, 'ds_uuid': _dataset.uuid, 'relaunch_in_mins': None if is_good else RELAUNCH_AFTER_MINUTES_ON_ERROR }) def __notify_about_upload_progress(self): """ Report a status log about the current progress of chunks upload. """ _dataset = self.dataset _uploaded_count = len(self.__uploaded_chunks) _total_count = len(self.__all_chunks) _uploaded_size = sum(chunk.size() for chunk in self.__uploaded_chunks) _total_size = sum(chunk.size() for chunk in self.__all_chunks) logger_status_backup_ch_upl.info( '-- Upload progress: %i of %i --', _uploaded_count, _total_count, extra={'num': _uploaded_count, 'of': _total_count, 'num_bytes': _uploaded_size, 'of_bytes': _total_size, 'ds_uuid': _dataset.uuid}) def __take_next_chunks_to_upload(self, chunk_count_by_size): """ Take out the next bunch of chunks to upload from the transaction state. @param chunk_count_by_size: the mapping from chunk size code to the required number of such chunks. @type chunk_count_by_size: col.Mapping """ M = 1024 * 1024 try_next_chunks = [] add_more = True logger.debug('Take some chunks to upload: %r', chunk_count_by_size) while add_more: # What chunk size we are going to upload next? try_next_chunk_size = min(chunk_count_by_size.iterkeys()) # How much more chunks with this chunk size we may upload # to this host? chunk_size_count_with_this_chunk_size = \ chunk_count_by_size.get(try_next_chunk_size, 0) assert chunk_size_count_with_this_chunk_size > 0, \ repr(chunk_size_count_with_this_chunk_size) # Decrease the chunk size count, and remove completely if 0 chunk_size_count_with_this_chunk_size -= 1 if chunk_size_count_with_this_chunk_size: chunk_count_by_size[try_next_chunk_size] = \ chunk_size_count_with_this_chunk_size else: del chunk_count_by_size[try_next_chunk_size] assert self.__chunks_by_size_code, \ 'We still have the chunks allowed to upload, '\ "but don't have the chunks available." # What next chunk we will use? First find the list # of the chunks... chunks_of_this_size = \ self.__chunks_by_size_code.get(try_next_chunk_size, []) assert chunks_of_this_size # Then select the next chunk, remove if from the list # and wipe the list if this was the last chunk try_next_chunk = chunks_of_this_size.pop() if not chunks_of_this_size: del self.__chunks_by_size_code[try_next_chunk_size] try_next_chunks.append(try_next_chunk) # In what cases do we need to stop # forming the CHUNKS message? _current_total_size = sum(c.size() for c in try_next_chunks) add_more = (chunk_count_by_size and MERGE_CHUNKS_ON_BACKUP and _current_total_size < MAX_CHUNK_MESSAGE_SIZE * M) return try_next_chunks def __put_back_non_uploaded_chunks(self, host, chunks): """ Put back (into the transaction state) some chunks which were not successfully uploaded. @type chunks: col.Iterable """ # [1/2] Put back the host (if needed), # and increment the number of pending chunks. # [1a] Put back the hosts if host not in self.target_hosts: logger.debug('For %r, the non-uploaded counts were missing, ' 'restoring', host) self.target_hosts[host] = _target_hosts = col.Counter() # [1b] Increment the counts put_back_chunk_sizes = col.Counter(ch.maxsize_code for ch in chunks) logger.debug('Need to pend back the following chunk sizes: %r', put_back_chunk_sizes) self.target_hosts[host] += put_back_chunk_sizes # [2/2] Put back the chunks. logger.debug('Putting back %i chunk(s): %r', len(chunks), [ch.uuid for ch in chunks]) for chunk in chunks: self.__chunks_by_size_code.setdefault(chunk.maxsize_code, []) \ .append(chunk) def __try_upload_next_chunks(self): """ We received the information which hosts we should use; now find the next host and try to upload the next chunk (or maybe multiple chunks) to it. @returns: Whether any chunk was in fact uploaded. @rtype: bool @todo: If the connection fails to the url, we must delete it from the list. """ assert not in_main_thread() _dataset = self.dataset logger.debug('Trying to upload next chunk(s)...') # If we could not proceed with something on this iteration, # but there is still other information which may be used, # we just retry it. We could've just call the same function again # and again, but risk failing due to the limited stack. while True: if not self.target_hosts: if self.__chunks_by_size_code: # We still have some chunks not uploaded: # the backup transaction failed! self.ack_result_code = BackupMessage.ResultCodes \ .GENERAL_FAILURE logger.debug('Error: no target hosts, ' 'but still some chunks: %r', self.__chunks_by_size_code) return False else: logger.debug('Backup targets %r', self.target_hosts) # Select any next host, pseudo-randomly (but reproducibly) target_host = \ self.__random.choice(sorted(self.target_hosts.keys())) chunk_count_by_size = self.target_hosts[target_host] if not chunk_count_by_size: logger.debug('Good! No more chunks allowed for %r!', target_host) del self.target_hosts[target_host] # Try again, probably with the other host. logger.debug('Remaining %r', self.target_hosts) continue assert 0 not in chunk_count_by_size.itervalues(), \ repr(chunk_count_by_size) # Shall we send a single chunk or all chunks altogether? # Analyze chunk(s) to send and put it/them # into the try_next_chunks variable. logger.debug('We need to upload such chunks to %r: %r', target_host, chunk_count_by_size) try_next_chunks = self.__take_next_chunks_to_upload( chunk_count_by_size) # We've found the list of one (or maybe more) chunks to send, # let's encrypt and post them. try_next_chunks_encrypted = \ map(partial(EncryptedChunkFromFiles.from_non_encrypted, self.__cryptographer), try_next_chunks) # We collected the chunks to upload! # Start the nested CHUNKS transaction then. logger.debug('Sending %d chunk(s) in a single batch', len(try_next_chunks_encrypted)) _message = self.message assert target_host.uuid in self.manager.app.known_peers, \ u'Host {!r} is not in {!r}' \ .format(target_host, self.manager.app.known_peers.keys()) # repr(try_next_chunks) might be pretty long, # so let's '.verbose()' it. logger.verbose('b. Will upload chunks %r to %r', try_next_chunks, target_host.urls) c_tr = self.manager.create_new_transaction( name='CHUNKS', src=_message.dst, dst=target_host, parent=self, # CHUNKS-specific chunks=try_next_chunks_encrypted) # Do NOT change the following lines to "addCallbacks": # "__on_chunks_failure()" must be called even if the internals # of "__on_chunks_success" have failed! c_tr.completed.addCallback( self.__on_chunks_success, _message.dst, target_host, try_next_chunks) c_tr.completed.addErrback( self.__on_chunks_failure, target_host, try_next_chunks) c_tr.completed.addBoth( exceptions_logged(logger)( lambda ignore: self.__upload_more_chunks())) # Try again for the next chunk # only when the started transaction succeeds. return True @exceptions_logged(logger) @contract_epydoc def __on_chunks_success(self, c_state, from_host, to_host, chunks): """ This method is called after the child CHUNKS transaction has succeeded. @type c_state: ChunksTransaction_Host.State @type from_host: Host @type to_host: Host @type chunks: col.Iterable """ from_uuid, to_uuid = from_host.uuid, to_host.uuid uploaded_chunks = set(c_state.chunks) logger.debug('Uploaded the child chunks from %r to %r: %r', from_host, to_host, uploaded_chunks) @exceptions_logged(logger) def _on_chunks_progress_success(p_state): """ This method is called after the PROGRESS transaction reporting to the node about the uploaded chunks has succeeded. @type p_state: ProgressTransaction_Host.State """ logger.debug('Reported progress successfully for %i chunks!', len(uploaded_chunks)) # Just uploaded the chunk(s), so mark it/them # as uploaded locally. for _ch in uploaded_chunks: HostQueries.HostChunks.mark_chunk_uploaded(_ch.uuid) @exceptions_logged(logger) def _on_chunks_progress_failure(failure): logger.error('Chunks reporting issue: %r', failure) # Unfortunately, we've already reported that # some chunks were uploaded; but the Node doesn't know that. # Let's "go back" in the progress logs. self.__uploaded_chunks -= uploaded_chunks self.__notify_about_upload_progress() # We've uploaded some more chunks. # They cannot be fully considered "safe" though # until the Node confirms it received the proper Progress message. # But for more actual progress report, we can generate the info log # right now. # In case if Progress fails later, we'll just rollback # some chunk counts (see _on_chunks_progress_failure()). self.__uploaded_chunks |= uploaded_chunks self.__notify_about_upload_progress() pr_def = c_state.out_progress_deferred if pr_def is None: assert not c_state.outgoing_success, repr(c_state) logger.debug('Outgoing CHUNKS transaction has actually failed ' 'on the receiver side; we fail here manually') # Emulate the error failure = Failure(exc_value=Exception('CHUNKS failed on ' 'the receiver side')) self.__on_chunks_failure(failure, target_host=to_host, failed_chunks=chunks) else: # if pr_def is not None assert c_state.outgoing_success, repr(c_state) # Do NOT change to addCallbacks! pr_def.addCallback(_on_chunks_progress_success) pr_def.addErrback(_on_chunks_progress_failure) self.__progress_notif_deferreds.append(pr_def) logger.debug('Appending deferred for chunks %r: %r', uploaded_chunks, pr_def) @exceptions_logged(logger) def __on_chunks_failure(self, failure, target_host, failed_chunks): """This method is called after the child CHUNKS transaction has failed. @type failure: Failure @param target_host: to what host the chunks were uploaded. @type target_host: Host """ logger.error('Failed to upload chunks to %r (due to %r: %s), ' 'retrying them again: %r', target_host, failure, failure.getErrorMessage(), [ch.uuid for ch in failed_chunks]) self.__put_back_non_uploaded_chunks(target_host, failed_chunks) @unpauses def __complete_backup_transaction(self): """ Just complete the backup transaction, no matter of success or error. """ logger.debug('Completing the backup!') def on_end(self): """ @note: In error case, .dataset may be None. """ cls = self.__class__ if self.dataset is not None: _ds_uuid = self.dataset.uuid with cls.per_dataset_transactions_lock: assert _ds_uuid in cls.per_dataset_transactions, \ (_ds_uuid, cls.per_dataset_transactions) del cls.per_dataset_transactions[_ds_uuid] logger.debug('Removed backup %r, per dataset transactions ' 'are now %r', _ds_uuid, cls.per_dataset_transactions) self.message_ack = self.message.reply() self.message_ack.ack_result_code = self.ack_result_code self.manager.post_message(self.message_ack) _dataset = self.dataset if _dataset is not None: is_result_good = BackupMessage.ResultCodes.is_good( self.ack_result_code) self.__notify_about_backup_end(is_good=is_result_good) if not is_result_good: # Specially to be trackable by the user. logger.error('Backup failed: %r', self) self.manager.app \ .relaunch_backup(self.dataset.uuid, RELAUNCH_AFTER_MINUTES_ON_ERROR)
class KafkaClient(object): """Cluster-aware Kafka client `KafkaClient` maintains a cache of cluster metadata (brokers, topics, etc.) and routes each request to the appropriate broker connection. It must be bootstrapped with the address of at least one Kafka broker to retrieve the cluster metadata. You will typically use this class in combination with `Producer` or `Consumer` which provide higher-level behavior. When done with the client, call :meth:`.close()` to permanently dispose of it. This terminates any open connections and release resources. Do not set or mutate the attributes of `KafkaClient` objects. `KafkaClient` is not intended to be subclassed. :ivar reactor: Twisted reactor, as passed to the constructor. This must implement :class:`~twisted.internet.interfaces.IReactorTime` and :class:`~twisted.internet.interfaces.IReactorTCP`. :ivar str clientId: A short string used to identify the client to the server. This may appear in log messages on the server side. :ivar _brokers: Map of broker ID to broker metadata (host and port). This mapping is updated (mutated) whenever metadata is returned by a broker. :type _brokers: :class:`dict` mapping :class:`int` to :class:`afkak.common.BrokerMetadata` :ivar clients: Map of broker node ID to broker clients. Items are added to this map as a connection to a specific broker is needed. Once present the client's broker metadata is updated on change. Call :meth:`_get_brokerclient()` to get a broker client. This method constructs it and adds it to *clients* if it does not exist. Call :meth:`_close_brokerclients()` to close a broker client once it has been removed from *clients*. .. warning:: Despite the name, ``clients`` is a private attribute. Clients are removed when a full metadata fetch indicates that a broker no longer exists. Note that Afkak avoids doing a full metadata fetch whenever possible because it is an expensive operation, so it is possible for a broker client to remain in this map once the node is removed from the cluster. No requests will be routed to such a broker client, which will effectively leak. Afkak should be enhanced to remove such stale clients after a timeout period. :type clients: :class:`dict` mapping :class:`int` to :class:`_KafkaBrokerClient` :ivar float timeout: Client side request timeout, **in seconds**. :param float timeout: Client-side request timeout, **in milliseconds**. :param endpoint_factory: Callable which accepts *reactor*, *host* and *port* arguments. It must return a :class:`twisted.internet.interfaces.IStreamClientEndpoint`. Afkak does not apply a timeout to connection attempts because most endpoints include timeout logic. For example, the default of :class:`~twisted.internet.endpoints.HostnameEndpoint` applies a 30-second timeout. If an endpoint doesn't support timeouts you may need to wrap it to do so. :param retry_policy: Callable which accepts a count of *failures*. It returns the number of seconds (a `float`) to wait before the next attempt. This policy is used to schedule reconnection attempts to Kafka brokers. Use :func:`twisted.internet.application.backoffPolicy()` to generate such a callable. .. versionchanged:: Afkak 3.0.0 - The *endpoint_factory* argument was added. - The *retry_policy* argument was added. - *timeout* may no longer be `None`. Pass a large value instead. """ # This is the __CLIENT_SIDE__ timeout that's used when making requests # to our brokerclients. If a request doesn't return within this amount # of time, we errback() the deferred. This is _NOT_ the server-side # timeout which is passed into the send_{produce,fetch}_request methods # which have defaults set below. This one should be larger, btw :-) DEFAULT_REQUEST_TIMEOUT_MSECS = 10000 # Default timeout msec for fetch requests. This is how long the server # will wait trying to get enough bytes of messages to fulfill the fetch # request. When this times out on the server side, it sends back a # response with as many bytes of messages as it has. See the docs for # more caveats on this timeout. DEFAULT_FETCH_SERVER_WAIT_MSECS = 5000 # Default minimum amount of message bytes sent back on a fetch request DEFAULT_FETCH_MIN_BYTES = 4096 # Default number of msecs the lead-broker will wait for replics to # ack Produce requests before failing the request DEFAULT_REPLICAS_ACK_MSECS = 1000 clientId = u"afkak-client" _clientIdBytes = clientId.encode() def __init__(self, hosts, clientId=None, timeout=DEFAULT_REQUEST_TIMEOUT_MSECS, disconnect_on_timeout=False, correlation_id=0, reactor=None, endpoint_factory=HostnameEndpoint, retry_policy=_DEFAULT_RETRY_POLICY): self.timeout = float(timeout) / 1000.0 # msecs to secs if clientId is not None: self.clientId = clientId self._clientIdBytes = _coerce_client_id(clientId) # FIXME: clients should be private self.clients = {} # Broker-NodeID -> _KafkaBrokerClient instance self.topics_to_brokers = {} # TopicAndPartition -> BrokerMetadata self.partition_meta = {} # TopicAndPartition -> PartitionMetadata self.consumer_group_to_brokers = {} # consumer_group -> BrokerMetadata self.coordinator_fetches = {} # consumer_group -> deferred self.topic_partitions = {} # topic_id -> [0, 1, 2, ...] self.topic_errors = {} # topic_id -> topic_error_code self.correlation_id = correlation_id self.close_dlist = None # Deferred wait on broker client disconnects # Do we disconnect brokerclients when requests via them timeout? self._disconnect_on_timeout = disconnect_on_timeout self._brokers = {} # Broker-NodeID -> BrokerMetadata self._topics = {} # Topic-Name -> TopicMetadata self._closing = False # Are we shutting down/shutdown? self.update_cluster_hosts(hosts) # Store hosts and mark for lookup if reactor is None: from twisted.internet import reactor self.reactor = reactor self._endpoint_factory = endpoint_factory assert retry_policy(1) >= 0.0 self._retry_policy = retry_policy @property def clock(self): # TODO: Deprecate this return self.reactor def __repr__(self): """return a string representing this KafkaClient.""" return '<{} clientId={} hosts={} timeout={}>'.format( self.__class__.__name__, self.clientId, ' '.join('{}:{}'.format(h, p) for h, p in self._bootstrap_hosts), self.timeout, ) def update_cluster_hosts(self, hosts): """ Advise the client of possible changes to Kafka cluster hosts In general Afkak will keep up with changes to the cluster, but in a Docker environment where all the nodes in the cluster may change IP address at once or in quick succession Afkak may fail to track changes to the cluster. This function lets you notify the Afkak client that some or all of the brokers may have changed. The hosts given are used the next time the client needs a fresh connection to look up cluster metadata. Parameters ========== hosts: (string|[string]) Hosts as a single comma separated "host[:port][,host[:port]]+" string, or a list of strings: ["host[:port]", ...] """ self._bootstrap_hosts = _normalize_hosts(hosts) def reset_topic_metadata(self, *topics): topics = tuple(_coerce_topic(t) for t in topics) for topic in topics: try: partitions = self.topic_partitions[topic] except KeyError: continue for partition in partitions: self.topics_to_brokers.pop( TopicAndPartition(topic, partition), None) del self.topic_partitions[topic] if topic in self.topic_errors: del self.topic_errors[topic] def reset_consumer_group_metadata(self, *groups): """Reset cache of what broker manages the offset for specified groups Remove the cache of what Kafka broker should be contacted when fetching or updating the committed offsets for a given consumer group or groups. NOTE: Does not cancel any outstanding requests for updates to the consumer group metadata for the specified groups. """ groups = tuple(_coerce_consumer_group(g) for g in groups) for group in groups: if group in self.consumer_group_to_brokers: del self.consumer_group_to_brokers[group] def reset_all_metadata(self): """Clear all cached metadata Metadata will be re-fetched as required to satisfy requests. """ self.topics_to_brokers.clear() self.topic_partitions.clear() self.topic_errors.clear() self.consumer_group_to_brokers.clear() def has_metadata_for_topic(self, topic): return _coerce_topic(topic) in self.topic_partitions def metadata_error_for_topic(self, topic): return self.topic_errors.get( _coerce_topic(topic), UnknownTopicOrPartitionError.errno) def partition_fully_replicated(self, topic_and_part): if topic_and_part not in self.partition_meta: return False part_meta = self.partition_meta[topic_and_part] return len(part_meta.replicas) == len(part_meta.isr) def topic_fully_replicated(self, topic): """ Determine if the given topic is fully replicated according to the currently known cluster metadata. .. note:: This relies on cached cluster metadata. You may call :meth:`load_metadata_for_topics()` first to refresh this cache. :param str topic: Topic name :returns: A boolean indicating that: 1. The number of partitions in the topic is non-zero. 2. For each partition, all replicas are in the in-sync replica (ISR) set. :rtype: :class:`bool` """ topic = _coerce_topic(topic) if topic not in self.topic_partitions: return False if not self.topic_partitions[topic]: # Don't consider an empty partition list 'fully replicated' return False return all( self.partition_fully_replicated(TopicAndPartition(topic, p)) for p in self.topic_partitions[topic] ) def close(self): """Permanently dispose of the client - Immediately mark the client as closed, causing current operations to fail with :exc:`~afkak.common.CancelledError` and future operations to fail with :exc:`~afkak.common.ClientError`. - Clear cached metadata. - Close any connections to Kafka brokers. :returns: deferred that fires when all resources have been released """ # If we're already waiting on an/some outstanding disconnects # make sure we continue to wait for them... log.debug("%r: close", self) self._closing = True # Close down any clients we have brokerclients, self.clients = self.clients, None self._close_brokerclients(brokerclients.values()) # clean up other outstanding operations self.reset_all_metadata() return self.close_dlist or defer.succeed(None) def load_metadata_for_topics(self, *topics): """Discover topic metadata and brokers Afkak internally calls this method whenever metadata is required. :param str topics: Topic names to look up. The resulting metadata includes the list of topic partitions, brokers owning those partitions, and which partitions are in sync. Fetching metadata for a topic may trigger auto-creation if that is enabled on the Kafka broker. When no topic name is given metadata for *all* topics is fetched. This is an expensive operation, but it does not trigger topic creation. :returns: :class:`Deferred` for the completion of the metadata fetch. This will fire with ``True`` on success, ``None`` on cancellation, or fail with an exception on error. On success, topic metadata is available from the attributes of :class:`KafkaClient`: :data:`~KafkaClient.topic_partitions`, :data:`~KafkaClient.topics_to_brokers`, etc. """ topics = tuple(_coerce_topic(t) for t in topics) log.debug("%r: load_metadata_for_topics(%s)", self, ', '.join(repr(t) for t in topics)) fetch_all_metadata = not topics # create the request requestId = self._next_id() request = KafkaCodec.encode_metadata_request(self._clientIdBytes, requestId, topics) # Callbacks for the request deferred... def _handleMetadataResponse(response): # Decode the response brokers, topics = KafkaCodec.decode_metadata_response(response) log.debug("%r: got metadata brokers=%r topics=%r", self, brokers, topics) # If we fetched the metadata for all topics, then store away the # received metadata for diagnostics. if fetch_all_metadata: self._brokers = brokers self._topics = topics # Iff we were fetching for all topics, and we got at least one # broker back, then remove brokers when we update our brokers ok_to_remove = (fetch_all_metadata and len(brokers)) # Take the metadata we got back, update our self.clients, and # if needed disconnect or connect from/to old/new brokers self._update_brokers(brokers.values(), remove=ok_to_remove) # Now loop through all the topics/partitions in the response # and setup our cache/data-structures for topic, topic_metadata in topics.items(): _, topic_error, partitions = topic_metadata self.reset_topic_metadata(topic) self.topic_errors[topic] = topic_error if not partitions: log.warning('No partitions for %s, Err:%d', topic, topic_error) continue self.topic_partitions[topic] = [] for partition, meta in partitions.items(): self.topic_partitions[topic].append(partition) topic_part = TopicAndPartition(topic, partition) self.partition_meta[topic_part] = meta if meta.leader == -1: log.warning('No leader for topic %s partition %s', topic, partition) self.topics_to_brokers[topic_part] = None else: self.topics_to_brokers[ topic_part] = brokers[meta.leader] self.topic_partitions[topic] = sorted( self.topic_partitions[topic]) return True def _handleMetadataErr(err): # This should maybe do more cleanup? if err.check(t_CancelledError, CancelledError): # Eat the error # XXX Shouldn't this return False? The success branch # returns True. return None log.error("Failed to retrieve metadata:%s", err) raise KafkaUnavailableError( "Unable to load metadata from configured " "hosts: {!r}".format(err)) # Send the request, add the handlers d = self._send_broker_unaware_request(requestId, request) d.addCallbacks(_handleMetadataResponse, _handleMetadataErr) return d def load_consumer_metadata_for_group(self, group): """ Determine broker for the consumer metadata for the specified group Returns a deferred which callbacks with True if the group's coordinator could be determined, or errbacks with ConsumerCoordinatorNotAvailableError if not. Parameters ---------- group: group name as `str` """ group = _coerce_consumer_group(group) log.debug("%r: load_consumer_metadata_for_group(%r)", self, group) # If we are already loading the metadata for this group, then # just return the outstanding deferred if group in self.coordinator_fetches: d = defer.Deferred() self.coordinator_fetches[group][1].append(d) return d # No outstanding request, create a new one requestId = self._next_id() request = KafkaCodec.encode_consumermetadata_request( self._clientIdBytes, requestId, group) # Callbacks for the request deferred... def _handleConsumerMetadataResponse(response_bytes): # Decode the response (returns ConsumerMetadataResponse) response = KafkaCodec.decode_consumermetadata_response(response_bytes) log.debug("%r: load_consumer_metadata_for_group(%r) -> %r", self, group, response) if response.error: raise BrokerResponseError.errnos.get(response.error, UnknownError)(response) bm = BrokerMetadata(response.node_id, response.host, response.port) self.consumer_group_to_brokers[group] = bm self._update_brokers([bm]) return True def _handleConsumerMetadataErr(err): log.error("Failed to retrieve consumer metadata for group %r", group, exc_info=(err.type, err.value, err.getTracebackObject())) # Clear any stored value for the group's coordinator self.reset_consumer_group_metadata(group) # FIXME: This exception should chain from err. raise ConsumerCoordinatorNotAvailableError( "Coordinator for group {!r} not available".format(group), ) def _propagate(result): [_, ds] = self.coordinator_fetches.pop(group, None) for d in ds: d.callback(result) # Send the request, add the handlers request_d = self._send_broker_unaware_request(requestId, request) d = defer.Deferred() # Save the deferred under the fetches for this group self.coordinator_fetches[group] = (request_d, [d]) request_d.addCallback(_handleConsumerMetadataResponse) request_d.addErrback(_handleConsumerMetadataErr) request_d.addBoth(_propagate) return d @inlineCallbacks def send_produce_request(self, payloads=None, acks=1, timeout=DEFAULT_REPLICAS_ACK_MSECS, fail_on_error=True, callback=None): """ Encode and send some ProduceRequests ProduceRequests will be grouped by (topic, partition) and then sent to a specific broker. Output is a list of responses in the same order as the list of payloads specified Parameters ---------- payloads: list of ProduceRequest acks: How many Kafka broker replicas need to write before the leader replies with a response timeout: How long the server has to receive the acks from the replicas before returning an error. fail_on_error: boolean, should we raise an Exception if we encounter an API error? callback: function, instead of returning the ProduceResponse, first pass it through this function Return ------ a deferred which callbacks with a list of ProduceResponse Raises ------ FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ encoder = partial( KafkaCodec.encode_produce_request, acks=acks, timeout=timeout) if acks == 0: decoder = None else: decoder = KafkaCodec.decode_produce_response resps = yield self._send_broker_aware_request( payloads, encoder, decoder) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_fetch_request(self, payloads=None, fail_on_error=True, callback=None, max_wait_time=DEFAULT_FETCH_SERVER_WAIT_MSECS, min_bytes=DEFAULT_FETCH_MIN_BYTES): """ Encode and send a FetchRequest Payloads are grouped by topic and partition so they can be pipelined to the same brokers. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ if (max_wait_time / 1000) > (self.timeout - 0.1): raise ValueError( "%r: max_wait_time: %d must be less than client.timeout by " "at least 100 milliseconds.", self, max_wait_time) encoder = partial(KafkaCodec.encode_fetch_request, max_wait_time=max_wait_time, min_bytes=min_bytes) # resps is a list of FetchResponse() objects, each of which can hold # 1-n messages. resps = yield self._send_broker_aware_request( payloads, encoder, KafkaCodec.decode_fetch_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_request(self, payloads=None, fail_on_error=True, callback=None): resps = yield self._send_broker_aware_request( payloads, KafkaCodec.encode_offset_request, KafkaCodec.decode_offset_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_fetch_request(self, group, payloads=None, fail_on_error=True, callback=None): """ Takes a group (string) and list of OffsetFetchRequest and returns a list of OffsetFetchResponse objects """ encoder = partial(KafkaCodec.encode_offset_fetch_request, group=group) decoder = KafkaCodec.decode_offset_fetch_response resps = yield self._send_broker_aware_request( payloads, encoder, decoder, consumer_group=group) returnValue(self._handle_responses( resps, fail_on_error, callback, group)) @inlineCallbacks def send_offset_commit_request(self, group, payloads=None, fail_on_error=True, callback=None, group_generation_id=-1, consumer_id=''): """Send a list of OffsetCommitRequests to the Kafka broker for the given consumer group. Args: group (str): The consumer group to which to commit the offsets payloads ([OffsetCommitRequest]): List of topic, partition, offsets to commit. fail_on_error (bool): Whether to raise an exception if a response from the Kafka broker indicates an error callback (callable): a function to call with each of the responses before returning the returned value to the caller. group_generation_id (int): Must currently always be -1 consumer_id (str): Must currently always be empty string Returns: [OffsetCommitResponse]: List of OffsetCommitResponse objects. Will raise KafkaError for failed requests if fail_on_error is True """ group = _coerce_consumer_group(group) encoder = partial(KafkaCodec.encode_offset_commit_request, group=group, group_generation_id=group_generation_id, consumer_id=consumer_id) decoder = KafkaCodec.decode_offset_commit_response resps = yield self._send_broker_aware_request( payloads, encoder, decoder, consumer_group=group) returnValue(self._handle_responses( resps, fail_on_error, callback, group)) # # # Private Methods # # # def _handle_responses(self, responses, fail_on_error, callback=None, consumer_group=None): out = [] for resp in responses: try: _check_error(resp) except (UnknownTopicOrPartitionError, NotLeaderForPartitionError): log.error('Error found in response: %s', resp) self.reset_topic_metadata(resp.topic) if fail_on_error: raise except (OffsetsLoadInProgressError, NotCoordinatorForConsumerError, ConsumerCoordinatorNotAvailableError): log.error('Error found in response: %s Consumer Group: %s', resp, consumer_group) self.reset_consumer_group_metadata(consumer_group) if fail_on_error: raise if callback is not None: out.append(callback(resp)) else: out.append(resp) return out def _get_brokerclient(self, node_id): """ Get a broker client. :param int node_id: Broker node ID :raises KeyError: for an unknown node ID :returns: :class:`_KafkaBrokerClient` """ if self._closing: raise ClientError("Cannot get broker client for node_id={}: {} has been closed".format(node_id, self)) if node_id not in self.clients: broker_metadata = self._brokers[node_id] log.debug("%r: creating client for %s", self, broker_metadata) self.clients[node_id] = _KafkaBrokerClient( self.reactor, self._endpoint_factory, broker_metadata, self.clientId, self._retry_policy, ) return self.clients[node_id] def _close_brokerclients(self, clients): """ Close the given broker clients. :param clients: Iterable of `_KafkaBrokerClient` """ def _log_close_failure(failure, brokerclient): log.debug( 'BrokerClient: %s close result: %s: %s', brokerclient, failure.type.__name__, failure.getErrorMessage()) def _clean_close_dlist(result, close_dlist): # If there aren't any other outstanding closings going on, then # close_dlist == self.close_dlist, and we can reset it. if close_dlist == self.close_dlist: self.close_dlist = None if not self.close_dlist: dList = [] else: log.debug("%r: _close_brokerclients has nested deferredlist: %r", self, self.close_dlist) dList = [self.close_dlist] for brokerClient in clients: log.debug("Calling close on: %r", brokerClient) d = brokerClient.close().addErrback(_log_close_failure, brokerClient) dList.append(d) self.close_dlist = DeferredList(dList) self.close_dlist.addBoth(_clean_close_dlist, self.close_dlist) def _update_brokers(self, brokers, remove=False): """ Update `self._brokers` and `self.clients` Update our self.clients based on brokers in received metadata Take the received dict of brokers and reconcile it with our current list of brokers (self.clients). If there is a new one, bring up a new connection to it, and if remove is True, and any in our current list aren't in the metadata returned, disconnect from it. :param brokers: Iterable of `BrokerMetadata`. A client will be created for every broker given if it doesn't yet exist. :param bool remove: Is this metadata for *all* brokers? If so, clients for brokers which are no longer found in the metadata will be closed. """ log.debug("%r: _update_brokers(%r, remove=%r)", self, brokers, remove) brokers_by_id = {bm.node_id: bm for bm in brokers} self._brokers.update(brokers_by_id) # Update the metadata of broker clients that already exist. for node_id, broker_meta in brokers_by_id.items(): if node_id not in self.clients: continue self.clients[node_id].updateMetadata(broker_meta) # Remove any clients for brokers which no longer exist. if remove: to_close = [ self.clients.pop(node_id) for node_id in set(self.clients) - set(brokers_by_id) ] if to_close: self._close_brokerclients(to_close) @inlineCallbacks def _get_leader_for_partition(self, topic, partition): """ Returns the leader for a partition or None if the partition exists but has no leader. PartitionUnavailableError will be raised if the topic or partition is not part of the metadata. """ key = TopicAndPartition(topic, partition) # reload metadata whether the partition is not available # or has no leader (broker is None) if self.topics_to_brokers.get(key) is None: yield self.load_metadata_for_topics(topic) if key not in self.topics_to_brokers: raise PartitionUnavailableError("%s not available" % str(key)) returnValue(self.topics_to_brokers[key]) @inlineCallbacks def _get_coordinator_for_group(self, consumer_group): """Returns the coordinator (broker) for a consumer group Returns the broker for a given consumer group or Raises ConsumerCoordinatorNotAvailableError """ if self.consumer_group_to_brokers.get(consumer_group) is None: yield self.load_consumer_metadata_for_group(consumer_group) returnValue(self.consumer_group_to_brokers.get(consumer_group)) def _next_id(self): """Generate a new correlation id.""" # modulo to keep within int32 (signed) self.correlation_id = (self.correlation_id + 1) % 2**31 return self.correlation_id def _make_request_to_broker(self, broker, requestId, request, **kwArgs): """Send a request to the specified broker.""" def _timeout_request(broker, requestId): """The time we allotted for the request expired, cancel it.""" try: # FIXME: This should be done by calling .cancel() on the Deferred # returned by the broker client. broker.cancelRequest(requestId, reason=RequestTimedOutError( 'Request: {} cancelled due to timeout'.format(requestId))) except KeyError: # pragma: no cover This should never happen... log.exception('ERROR: Failed to find key for timed-out ' 'request. Broker: %r Req: %d', broker, requestId) raise if self._disconnect_on_timeout: broker.disconnect() def _alert_blocked_reactor(timeout, start): """Complain if this timer didn't fire before the timeout elapsed""" now = self.reactor.seconds() if now >= (start + timeout): log.warning('Reactor was starved for %r seconds', now - start) def _cancel_timeout(result, dc): """Request completed/cancelled, cancel the timeout delayedCall.""" if dc.active(): dc.cancel() return result # Make the request to the specified broker log.debug('_mrtb: sending request: %d to broker: %r', requestId, broker) d = broker.makeRequest(requestId, request, **kwArgs) # Set a delayedCall to fire if we don't get a reply in time dc = self.reactor.callLater( self.timeout, _timeout_request, broker, requestId) # Set a delayedCall to complain if the reactor has been blocked rc = self.reactor.callLater( (self.timeout * 0.9), _alert_blocked_reactor, self.timeout, self.reactor.seconds()) # Setup a callback on the request deferred to cancel both callLater d.addBoth(_cancel_timeout, dc) d.addBoth(_cancel_timeout, rc) return d @inlineCallbacks def _send_broker_unaware_request(self, requestId, request): """ Attempt to send a broker-agnostic request to one of the known brokers: 1. Try each connected broker (in random order) 2. Try each known but unconnected broker (in random order) 3. Try each of the bootstrap hosts (in random order) :param bytes request: The bytes of a Kafka `RequestMessage`_ structure. It must have a unique (to this connection) correlation ID. :returns: API response message for *request* :rtype: Deferred[bytes] :raises: `KafkaUnavailableError` when making the request of all known hosts has failed. """ node_ids = list(self._brokers.keys()) # Randomly shuffle the brokers to distribute the load random.shuffle(node_ids) # Prioritize connected brokers def connected(node_id): try: return self.clients[node_id].connected() except KeyError: return False node_ids.sort(reverse=True, key=connected) for node_id in node_ids: broker = self._get_brokerclient(node_id) try: log.debug('_sbur: sending request %d to broker %r', requestId, broker) d = self._make_request_to_broker(broker, requestId, request) resp = yield d returnValue(resp) except KafkaError as e: log.warning(( "Will try next server after request with correlationId=%d" " failed against server %s:%i. Error: %s" ), requestId, broker.host, broker.port, e) # The request was not handled, likely because no broker metadata has # loaded yet (or all broker connections have failed). Fall back to # boostrapping. returnValue((yield self._send_bootstrap_request(request))) @inlineCallbacks def _send_bootstrap_request(self, request): """Make a request using an ephemeral broker connection This routine is used to make broker-unaware requests to get the initial cluster metadata. It cycles through the configured hosts, trying to connect and send the request to each in turn. This temporary connection is closed once a response is received. Note that most Kafka APIs require requests be sent to a specific broker. This method will only function for broker-agnostic requests like: * `Metadata <https://kafka.apache.org/protocol.html#The_Messages_Metadata>`_ * `FindCoordinator <https://kafka.apache.org/protocol.html#The_Messages_FindCoordinator>`_ :param bytes request: The bytes of a Kafka `RequestMessage`_ structure. It must have a unique (to this connection) correlation ID. :returns: API response message for *request* :rtype: Deferred[bytes] :raises: - `KafkaUnavailableError` when making the request of all known hosts has failed. - `twisted.internet.defer.TimeoutError` when connecting or making a request exceeds the timeout. """ hostports = list(self._bootstrap_hosts) random.shuffle(hostports) for host, port in hostports: ep = self._endpoint_factory(self.reactor, host, port) try: protocol = yield ep.connect(_bootstrapFactory) except Exception as e: log.debug("%s: bootstrap connect to %s:%s -> %s", self, host, port, e) continue try: response = yield protocol.request(request).addTimeout(self.timeout, self.reactor) except Exception: log.debug("%s: bootstrap request to %s:%s failed", self, host, port, exc_info=True) else: returnValue(response) finally: protocol.transport.loseConnection() raise KafkaUnavailableError("Failed to bootstrap from hosts {}".format(hostports)) @inlineCallbacks def _send_broker_aware_request(self, payloads, encoder_fn, decode_fn, consumer_group=None): """ Group a list of request payloads by topic+partition and send them to the leader broker for that partition using the supplied encode/decode functions Params ====== payloads: list of object-like entities with a topic and partition attribute. payloads must be grouped by (topic, partition) tuples. encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes consumer_group: [string], optional. Indicates the request should be directed to the Offset Coordinator for the specified consumer_group. Return ====== deferred yielding a list of response objects in the same order as the supplied payloads, or None if decode_fn is None. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError, """ # Calling this without payloads is nonsensical if not payloads: raise ValueError("Payloads parameter is empty") # Group the requests by topic+partition original_keys = [] payloads_by_broker = collections.defaultdict(list) # Go through all the payloads, lookup the leader/coordinator for that # payload's topic/partition or consumer group. If there's no # leader/coordinator (broker), raise. For each broker, keep # a list of the payloads to be sent to it. Also, for each payload in # the list of payloads, make a corresponding list (original_keys) with # the topic/partition in the same order, so we can lookup the returned # result(s) by that topic/partition key in the set of returned results # and return them in a list the same order the payloads were supplied for payload in payloads: # get leader/coordinator, depending on consumer_group if consumer_group is None: leader = yield self._get_leader_for_partition( payload.topic, payload.partition) if leader is None: raise LeaderUnavailableError( "Leader not available for topic %s partition %s" % (payload.topic, payload.partition)) else: leader = yield self._get_coordinator_for_group(consumer_group) if leader is None: raise ConsumerCoordinatorNotAvailableError( "Coordinator not available for group: %s" % (consumer_group)) payloads_by_broker[leader].append(payload) original_keys.append((payload.topic, payload.partition)) # Accumulate the responses in a dictionary acc = {} # The kafka server doesn't send replies to produce requests # with acks=0. In that case, our decode_fn will be # None, and we need to let the brokerclient know not # to expect a reply. makeRequest() returns a deferred # regardless, but in the expectResponse=False case, it will # fire as soon as the request is sent, and it can errBack() # due to being cancelled prior to the broker being able to # send the request. expectResponse = decode_fn is not None # keep a list of payloads that were failed to be sent to brokers failed_payloads = [] # Keep track of outstanding requests in a list of deferreds inFlight = [] # and the payloads that go along with them payloadsList = [] # For each broker, send the list of request payloads, for broker_meta, payloads in payloads_by_broker.items(): broker = self._get_brokerclient(broker_meta.node_id) requestId = self._next_id() request = encoder_fn(client_id=self._clientIdBytes, correlation_id=requestId, payloads=payloads) # Make the request d = self._make_request_to_broker(broker, requestId, request, expectResponse=expectResponse) inFlight.append(d) payloadsList.append(payloads) # Wait for all the responses to come back, or the requests to fail results = yield DeferredList(inFlight, consumeErrors=True) # We now have a list of (succeeded, response/Failure) tuples. Check 'em for (success, response), payloads in zip(results, payloadsList): if not success: # The brokerclient deferred was errback()'d: # The send failed, or this request was cancelled (by timeout) log.debug("%r: request:%r to broker failed: %r", self, payloads, response) failed_payloads.extend([(p, response) for p in payloads]) continue if not expectResponse: continue # Successful request/response. Decode it and store by topic/part for response in decode_fn(response): acc[(response.topic, response.partition)] = response # Order the accumulated responses by the original key order # Note that this scheme will throw away responses which we did # not request. See test_send_fetch_request, where the response # includes an error, but for a topic/part we didn't request. # Since that topic/partition isn't in original_keys, we don't pass # it back from here and it doesn't error out. # If any of the payloads failed, fail responses = [acc[k] for k in original_keys if k in acc] if acc else [] if failed_payloads: self.reset_all_metadata() raise FailedPayloadsError(responses, failed_payloads) returnValue(responses)
class KafkaClient(object): """Cluster-aware Kafka client. This is the high-level client which most clients should use. It maintains a collection of :class:`~afkak.brokerclient._KafkaBrokerClient` objects, one each to the various hosts in the Kafka cluster and auto selects the proper one based on the topic and partition of the request. It maintains a map of topics/partitions to brokers. A KafkaClient object maintains connections (reconnected as needed) to the various brokers. It must be bootstrapped with at least one host to retrieve the cluster metadata. :ivar reactor: Twisted reactor, as passed to the constructor. This must implement :class:`~twisted.internet.interfaces.IReactorTime` and :class:`~twisted.internet.interfaces.IReactorTCP`. :ivar str clientId: A short string used to identify the client to the server. This may appear in log messages on the server side. :ivar clients: Map of (host, port) tuples to :class:`_KafkaBrokerClient` instances. :type clients: :class:`dict` of (:class:`str`, :class:`int`) to :class:`_KafkaBrokerClient` """ # This is the __CLIENT_SIDE__ timeout that's used when making requests # to our brokerclients. If a request doesn't return within this amount # of time, we errback() the deferred. This is _NOT_ the server-side # timeout which is passed into the send_{produce,fetch}_request methods # which have defaults set below. This one should be larger, btw :-) DEFAULT_REQUEST_TIMEOUT_MSECS = 10000 # Default timeout msec for fetch requests. This is how long the server # will wait trying to get enough bytes of messages to fulfill the fetch # request. When this times out on the server side, it sends back a # response with as many bytes of messages as it has. See the docs for # more caveats on this timeout. DEFAULT_FETCH_SERVER_WAIT_MSECS = 5000 # Default minimum amount of message bytes sent back on a fetch request DEFAULT_FETCH_MIN_BYTES = 4096 # Default number of msecs the lead-broker will wait for replics to # ack Produce requests before failing the request DEFAULT_REPLICAS_ACK_MSECS = 1000 clientId = u"afkak-client" _clientIdBytes = clientId.encode() def __init__(self, hosts, clientId=None, timeout=DEFAULT_REQUEST_TIMEOUT_MSECS, disconnect_on_timeout=False, correlation_id=0, reactor=None): if timeout is not None: if not isinstance(timeout, Real): raise TypeError( "Timeout value: {!r} of type: {!s} is invalid. Must be " "None or Real.".format(timeout, type(timeout))) timeout /= 1000.0 # msecs to secs self.timeout = timeout if clientId is not None: self.clientId = clientId self._clientIdBytes = _coerce_client_id(clientId) # Setup all our initial attributes self.clients = {} # (host,port) -> _KafkaBrokerClient instance self.topics_to_brokers = {} # TopicAndPartition -> BrokerMetadata self.partition_meta = {} # TopicAndPartition -> PartitionMetadata self.consumer_group_to_brokers = {} # consumer_group -> BrokerMetadata self.coordinator_fetches = {} # consumer_group -> deferred self.topic_partitions = {} # topic_id -> [0, 1, 2, ...] self.topic_errors = {} # topic_id -> topic_error_code self.correlation_id = correlation_id self.close_dlist = None # Deferred wait on broker client disconnects # Do we disconnect brokerclients when requests via them timeout? self._disconnect_on_timeout = disconnect_on_timeout self._brokers = {} # Broker-NodeID -> BrokerMetadata self._topics = {} # Topic-Name -> TopicMetadata self._closing = False # Are we shutting down/shutdown? self.update_cluster_hosts(hosts) # Store hosts and mark for lookup # clock/reactor for testing... if reactor is None: from twisted.internet import reactor self.reactor = reactor @property def clock(self): # TODO: Deprecate this return self.reactor def __repr__(self): """return a string representing this KafkaClient.""" return '<KafkaClient clientId={0} brokers={1} timeout={2}>'.format( self.clientId, sorted(self.clients.keys()), self.timeout, ) def update_cluster_hosts(self, hosts): """Advise the Afkak client of possible changes to Kafka cluster hosts In general Afkak will keep up with changes to the cluster, but in a Docker environment where all the nodes in the cluster may change IP address at once or in quick succession Afkak may lose connections to all of the brokers. This function lets you notify the Afkak client that some or all of the brokers may have changed. Afkak will compare the new list to the old and make new connections as needed. Parameters ========== hosts: (string|[string]) Hosts as a single comma separated "host[:port][,host[:port]]+" string, or a list of strings: ["host[:port]", ...] Return ====== None """ self._hosts = hosts self._collect_hosts_d = True def reset_topic_metadata(self, *topics): topics = tuple(_coerce_topic(t) for t in topics) for topic in topics: try: partitions = self.topic_partitions[topic] except KeyError: continue for partition in partitions: self.topics_to_brokers.pop(TopicAndPartition(topic, partition), None) del self.topic_partitions[topic] if topic in self.topic_errors: del self.topic_errors[topic] def reset_consumer_group_metadata(self, *groups): """Reset cache of what broker manages the offset for specified groups Remove the cache of what Kafka broker should be contacted when fetching or updating the committed offsets for a given consumer group or groups. NOTE: Does not cancel any outstanding requests for updates to the consumer group metadata for the specified groups. """ groups = tuple(_coerce_consumer_group(g) for g in groups) for group in groups: if group in self.consumer_group_to_brokers: del self.consumer_group_to_brokers[group] def reset_all_metadata(self): self.topics_to_brokers.clear() self.topic_partitions.clear() self.topic_errors.clear() self.consumer_group_to_brokers.clear() def has_metadata_for_topic(self, topic): return _coerce_topic(topic) in self.topic_partitions def metadata_error_for_topic(self, topic): return self.topic_errors.get(_coerce_topic(topic), UnknownTopicOrPartitionError.errno) def partition_fully_replicated(self, topic_and_part): if topic_and_part not in self.partition_meta: return False part_meta = self.partition_meta[topic_and_part] return len(part_meta.replicas) == len(part_meta.isr) def topic_fully_replicated(self, topic): """ Determine if the given topic is fully replicated according to the currently known cluster metadata. .. note:: This relies on cached cluster metadata. You may call :meth:`load_metadata_for_topics()` first to refresh this cache. :param str topic: Topic name :returns: A boolean indicating that: 1. The number of partitions in the topic is non-zero. 2. For each partition, all replicas are in the in-sync replica (ISR) set. :rtype: :class:`bool` """ topic = _coerce_topic(topic) if topic not in self.topic_partitions: return False if not self.topic_partitions[topic]: # Don't consider an empty partition list 'fully replicated' return False return all( self.partition_fully_replicated(TopicAndPartition(topic, p)) for p in self.topic_partitions[topic]) def close(self): # If we're already waiting on an/some outstanding disconnects # make sure we continue to wait for them... log.debug("%r: close", self) self._closing = True # Close down any clients we have self._close_brokerclients(self.clients.keys()) # clean up other outstanding operations self.reset_all_metadata() return self.close_dlist def load_metadata_for_topics(self, *topics): """ Discover brokers and metadata for a set of topics. This function is called lazily whenever metadata is unavailable. :param topics: The topics for which to fetch metadata (topic name as :class:`str`). Metadata for *all* topics is fetched when no topic is specified. :returns: :class:`Deferred` for the completion of the metadata fetch. This will resolve with ``True`` on success, ``None`` on cancellation, or fail with an exception on error. On success, topic metadata is available from the attributes of :class:`KafkaClient`: :data:`~KafkaClient.topic_partitions`, :data:`~KafkaClient.topics_to_brokers`, etc. """ topics = tuple(_coerce_topic(t) for t in topics) log.debug("%r: load_metadata_for_topics: %r", self, topics) fetch_all_metadata = not topics # create the request requestId = self._next_id() request = KafkaCodec.encode_metadata_request(self._clientIdBytes, requestId, topics) # Callbacks for the request deferred... def _handleMetadataResponse(response): # Decode the response (brokers, topics) = \ KafkaCodec.decode_metadata_response(response) log.debug("%r: Broker/Topic metadata: %r/%r", self, brokers, topics) # If we fetched the metadata for all topics, then store away the # received metadata for diagnostics. if fetch_all_metadata: self._brokers = brokers self._topics = topics # Iff we were fetching for all topics, and we got at least one # broker back, then remove brokers when we update our brokers ok_to_remove = (fetch_all_metadata and len(brokers)) # Take the metadata we got back, update our self.clients, and # if needed disconnect or connect from/to old/new brokers self._update_brokers( [(nativeString(b.host), b.port) for b in brokers.values()], remove=ok_to_remove, ) # Now loop through all the topics/partitions in the response # and setup our cache/data-structures for topic, topic_metadata in topics.items(): _, topic_error, partitions = topic_metadata self.reset_topic_metadata(topic) self.topic_errors[topic] = topic_error if not partitions: log.warning('No partitions for %s, Err:%d', topic, topic_error) continue self.topic_partitions[topic] = [] for partition, meta in partitions.items(): self.topic_partitions[topic].append(partition) topic_part = TopicAndPartition(topic, partition) self.partition_meta[topic_part] = meta if meta.leader == -1: log.warning('No leader for topic %s partition %s', topic, partition) self.topics_to_brokers[topic_part] = None else: self.topics_to_brokers[topic_part] = brokers[ meta.leader] self.topic_partitions[topic] = sorted( self.topic_partitions[topic]) return True def _handleMetadataErr(err): # This should maybe do more cleanup? if err.check(t_CancelledError, CancelledError): # Eat the error return None log.error("Failed to retrieve metadata:%s", err) raise KafkaUnavailableError( "Unable to load metadata from configured " "hosts: {!r}".format(err)) # Send the request, add the handlers d = self._send_broker_unaware_request(requestId, request) d.addCallbacks(_handleMetadataResponse, _handleMetadataErr) return d def load_consumer_metadata_for_group(self, group): """ Determine broker for the consumer metadata for the specified group Returns a deferred which callbacks with True if the group's coordinator could be determined, or errbacks with ConsumerCoordinatorNotAvailableError if not. Parameters ---------- group: group name as `str` """ group = _coerce_consumer_group(group) log.debug("%r: load_consumer_metadata_for_group: %r", self, group) # If we are already loading the metadata for this group, then # just return the outstanding deferred if group in self.coordinator_fetches: return self.coordinator_fetches[group] # No outstanding request, create a new one requestId = self._next_id() request = KafkaCodec.encode_consumermetadata_request( self._clientIdBytes, requestId, group) # Callbacks for the request deferred... def _handleConsumerMetadataResponse(response, group): # Clear the outstanding fetch self.coordinator_fetches.pop(group, None) # Decode the response (returns ConsumerMetadataResponse) c_m_resp = KafkaCodec.decode_consumermetadata_response(response) log.debug("%r: c_m_resp: %r", self, c_m_resp) if c_m_resp.error: # Raise the appropriate error resp_err = BrokerResponseError.errnos.get( c_m_resp.error, UnknownError)(c_m_resp) raise resp_err self.consumer_group_to_brokers[group] = \ BrokerMetadata(c_m_resp.node_id, c_m_resp.host, c_m_resp.port) return True def _handleConsumerMetadataErr(err, group): # Clear the outstanding fetch self.coordinator_fetches.pop(group, None) log.error( "Failed to retrieve consumer metadata " "for group: %s Error:%r", group, err) # Clear any stored value for the group's coordinator self.reset_consumer_group_metadata(group) raise ConsumerCoordinatorNotAvailableError( "Coordinator for group: %s not available" % (group)) # Send the request, add the handlers d = self._send_broker_unaware_request(requestId, request) # Save the deferred under the fetches for this group self.coordinator_fetches[group] = d d.addCallback(_handleConsumerMetadataResponse, group) d.addErrback(_handleConsumerMetadataErr, group) return d @inlineCallbacks def send_produce_request(self, payloads=None, acks=1, timeout=DEFAULT_REPLICAS_ACK_MSECS, fail_on_error=True, callback=None): """ Encode and send some ProduceRequests ProduceRequests will be grouped by (topic, partition) and then sent to a specific broker. Output is a list of responses in the same order as the list of payloads specified Parameters ---------- payloads: list of ProduceRequest acks: How many Kafka broker replicas need to write before the leader replies with a response timeout: How long the server has to receive the acks from the replicas before returning an error. fail_on_error: boolean, should we raise an Exception if we encounter an API error? callback: function, instead of returning the ProduceResponse, first pass it through this function Return ------ a deferred which callbacks with a list of ProduceResponse Raises ------ FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ encoder = partial(KafkaCodec.encode_produce_request, acks=acks, timeout=timeout) if acks == 0: decoder = None else: decoder = KafkaCodec.decode_produce_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_fetch_request(self, payloads=None, fail_on_error=True, callback=None, max_wait_time=DEFAULT_FETCH_SERVER_WAIT_MSECS, min_bytes=DEFAULT_FETCH_MIN_BYTES): """ Encode and send a FetchRequest Payloads are grouped by topic and partition so they can be pipelined to the same brokers. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError """ if self.timeout is not None and (max_wait_time / 1000) > (self.timeout - 0.1): raise ValueError( "%r: max_wait_time: %d must be less than client.timeout by " "at least 100 milliseconds.", self, max_wait_time) encoder = partial(KafkaCodec.encode_fetch_request, max_wait_time=max_wait_time, min_bytes=min_bytes) # resps is a list of FetchResponse() objects, each of which can hold # 1-n messages. resps = yield self._send_broker_aware_request( payloads, encoder, KafkaCodec.decode_fetch_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_request(self, payloads=None, fail_on_error=True, callback=None): resps = yield self._send_broker_aware_request( payloads, KafkaCodec.encode_offset_request, KafkaCodec.decode_offset_response) returnValue(self._handle_responses(resps, fail_on_error, callback)) @inlineCallbacks def send_offset_fetch_request(self, group, payloads=None, fail_on_error=True, callback=None): """ Takes a group (string) and list of OffsetFetchRequest and returns a list of OffsetFetchResponse objects """ encoder = partial(KafkaCodec.encode_offset_fetch_request, group=group) decoder = KafkaCodec.decode_offset_fetch_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder, consumer_group=group) returnValue( self._handle_responses(resps, fail_on_error, callback, group)) @inlineCallbacks def send_offset_commit_request(self, group, payloads=None, fail_on_error=True, callback=None, group_generation_id=-1, consumer_id=''): """Send a list of OffsetCommitRequests to the Kafka broker for the given consumer group. Args: group (str): The consumer group to which to commit the offsets payloads ([OffsetCommitRequest]): List of topic, partition, offsets to commit. fail_on_error (bool): Whether to raise an exception if a response from the Kafka broker indicates an error callback (callable): a function to call with each of the responses before returning the returned value to the caller. group_generation_id (int): Must currently always be -1 consumer_id (str): Must currently always be empty string Returns: [OffsetCommitResponse]: List of OffsetCommitResponse objects. Will raise KafkaError for failed requests if fail_on_error is True """ group = _coerce_consumer_group(group) encoder = partial(KafkaCodec.encode_offset_commit_request, group=group, group_generation_id=group_generation_id, consumer_id=consumer_id) decoder = KafkaCodec.decode_offset_commit_response resps = yield self._send_broker_aware_request(payloads, encoder, decoder, consumer_group=group) returnValue( self._handle_responses(resps, fail_on_error, callback, group)) # # # Private Methods # # # def _handle_responses(self, responses, fail_on_error, callback=None, consumer_group=None): out = [] for resp in responses: try: _check_error(resp) except (UnknownTopicOrPartitionError, NotLeaderForPartitionError): log.error('Error found in response: %s', resp) self.reset_topic_metadata(resp.topic) if fail_on_error: raise except (OffsetsLoadInProgressError, NotCoordinatorForConsumerError, ConsumerCoordinatorNotAvailableError): log.error('Error found in response: %s Consumer Group: %s', resp, consumer_group) self.reset_consumer_group_metadata(consumer_group) if fail_on_error: raise if callback is not None: out.append(callback(resp)) else: out.append(resp) return out def _get_brokerclient(self, host, port): """ Get or create a connection to a broker using host and port. Returns the broker immediately, but the broker may have just been created and be in an unconnected state. The broker will connect on an as-needed basis when processing a request. """ host_key = (nativeString(host), port) if host_key not in self.clients: # We don't have a brokerclient for that host/port, create one, # ask it to connect log.debug("%r: creating client for %s:%d", self, host, port) self.clients[host_key] = _KafkaBrokerClient( self.reactor, host, port, self.clientId, subscriber=self._update_broker_state, ) return self.clients[host_key] def _update_broker_state(self, broker, connected, reason): """ Handle updates of a broker's connection state. If we get an update with a state other than 'connected', reset our metadata, as it indicates that a connection to one of our brokers ended, or failed to come up correctly """ def _md_load_on_disconnect_failure(result): log.debug( 'Attempt to fetch Kafka metadata after ' 'disconnect failed with: %r', result) state = "Connected" if connected else "Disconnected" log.debug("Broker:%r state changed:%s for reason:%r", broker, state, reason) # If one of our broker clients disconnected, there may be a metadata # change. Make sure we check... if not connected: self.reset_all_metadata() if not self._closing: # If we're not shutting down, and we're not already doing a # lookup, then mark ourselves as needing to re-resolve, and # then start a metadata lookup, which will do the lookup as # needed... if self._collect_hosts_d is None: self._collect_hosts_d = True d = self.load_metadata_for_topics() d.addErrback(_md_load_on_disconnect_failure) def _close_brokerclients(self, brokers): """ Pop each of the supplied brokers from self.clients Close that broker, and manage the completion of those operations """ def _log_close_failure(failure, brokerclient): log.debug('BrokerClient: %s close result: %s: %s', brokerclient, failure.type.__name__, failure.getErrorMessage()) def _clean_close_dlist(result, close_dlist): # If there aren't any other outstanding closings going on, then # close_dlist == self.close_dlist, and we can reset it. if close_dlist == self.close_dlist: self.close_dlist = None if not self.close_dlist: dList = [] else: log.debug("%r: _update_brokers has nested deferredlist: %r", self, self.close_dlist) dList = [self.close_dlist] for broker in list(brokers): # broker better be in self.clients if not, weirdness brokerClient = self.clients.pop(broker) log.debug("Calling close on: %r", brokerClient) dList.append(brokerClient.close().addErrback( _log_close_failure, brokerClient)) self.close_dlist = DeferredList(dList) self.close_dlist.addBoth(_clean_close_dlist, self.close_dlist) def _update_brokers(self, new_brokers, remove=False): """ Update our self.clients based on brokers in received metadata Take the received dict of brokers and reconcile it with our current list of brokers (self.clients). If there is a new one, bring up a new connection to it, and if remove is True, and any in our current list aren't in the metadata returned, disconnect from it. """ log.debug("%r: _update_brokers: %r remove: %r", self, new_brokers, remove) # Work with the brokers as sets new_brokers = set(new_brokers) current_brokers = set(self.clients.keys()) # set of added added_brokers = new_brokers - current_brokers # removed removed_brokers = current_brokers - new_brokers # Create any new brokers based on the new metadata for broker in added_brokers: self._get_brokerclient(*broker) # Disconnect and remove from self.clients any removed brokerclients if remove and removed_brokers: self._close_brokerclients(removed_brokers) @inlineCallbacks def _get_leader_for_partition(self, topic, partition): """ Returns the leader for a partition or None if the partition exists but has no leader. PartitionUnavailableError will be raised if the topic or partition is not part of the metadata. """ key = TopicAndPartition(topic, partition) # reload metadata whether the partition is not available # or has no leader (broker is None) if self.topics_to_brokers.get(key) is None: yield self.load_metadata_for_topics(topic) if key not in self.topics_to_brokers: raise PartitionUnavailableError("%s not available" % str(key)) returnValue(self.topics_to_brokers[key]) @inlineCallbacks def _get_coordinator_for_group(self, consumer_group): """Returns the coordinator (broker) for a consumer group Returns the broker for a given consumer group or Raises ConsumerCoordinatorNotAvailableError """ if self.consumer_group_to_brokers.get(consumer_group) is None: yield self.load_consumer_metadata_for_group(consumer_group) returnValue(self.consumer_group_to_brokers.get(consumer_group)) def _next_id(self): """Generate a new correlation id.""" # modulo to keep within int32 (signed) self.correlation_id = (self.correlation_id + 1) % 2**31 return self.correlation_id def _make_request_to_broker(self, broker, requestId, request, **kwArgs): """Send a request to the specified broker.""" def _timeout_request(broker, requestId): """The time we allotted for the request expired, cancel it.""" try: broker.cancelRequest( requestId, reason=RequestTimedOutError( 'Request: {} cancelled due to timeout'.format( requestId))) except KeyError: # pragma: no cover This should never happen... log.exception( 'ERROR: Failed to find key for timed-out ' 'request. Broker: %r Req: %d', broker, requestId) raise if self._disconnect_on_timeout: broker.disconnect() def _alert_blocked_reactor(timeout, start): """Complain if this timer didn't fire before the timeout elapsed""" now = self.reactor.seconds() if now >= (start + timeout): log.error('Reactor was starved for %f seconds during request.', now - start) def _cancel_timeout(_, dc): """Request completed/cancelled, cancel the timeout delayedCall.""" if dc.active(): dc.cancel() return _ # Make the request to the specified broker log.debug('_mrtb: sending request: %d to broker: %r', requestId, broker) d = broker.makeRequest(requestId, request, **kwArgs) if self.timeout is not None: # Set a delayedCall to fire if we don't get a reply in time dc = self.reactor.callLater(self.timeout, _timeout_request, broker, requestId) # Set a delayedCall to complain if the reactor has been blocked rc = self.reactor.callLater((self.timeout * 0.9), _alert_blocked_reactor, self.timeout, self.reactor.seconds()) # Setup a callback on the request deferred to cancel both callLater d.addBoth(_cancel_timeout, dc) d.addBoth(_cancel_timeout, rc) return d @inlineCallbacks def _send_broker_unaware_request(self, requestId, request): """ Attempt to send a broker-agnostic request to one of the available brokers. Keep trying until you succeed, or run out of hosts to try """ # Check if we've had a condition which indicates we might need to # re-resolve the IPs of our hosts if self._collect_hosts_d: if self._collect_hosts_d is True: # Lookup needed, but not yet started. Start it. self._collect_hosts_d = _collect_hosts(self._hosts) broker_list = yield self._collect_hosts_d self._collect_hosts_d = None if broker_list: self._update_brokers(broker_list, remove=True) else: # Lookup of all hosts returned no IPs. Log an error, setup # to retry lookup, and try to continue with the brokers we # already have... log.error('Failed to resolve hosts: %r', self._hosts) self._collect_hosts_d = True brokers = list(self.clients.values()) # Randomly shuffle the brokers to distribute the load, but random.shuffle(brokers) # Prioritize connected brokers brokers.sort(reverse=True, key=lambda broker: broker.connected()) for broker in brokers: try: log.debug('_sbur: sending request: %d to broker: %r', requestId, broker) d = self._make_request_to_broker(broker, requestId, request) resp = yield d returnValue(resp) except KafkaError as e: log.warning( "Could not makeRequest id:%d [%r] to server %s:%i, " "trying next server. Err: %r", requestId, request, broker.host, broker.port, e) # Anytime we fail a request to every broker, setup for a re-resolve self._collect_hosts_d = True raise KafkaUnavailableError( "All servers (%r) failed to process request" % brokers) @inlineCallbacks def _send_broker_aware_request(self, payloads, encoder_fn, decode_fn, consumer_group=None): """ Group a list of request payloads by topic+partition and send them to the leader broker for that partition using the supplied encode/decode functions Params ====== payloads: list of object-like entities with a topic and partition attribute. payloads must be grouped by (topic, partition) tuples. encode_fn: a method to encode the list of payloads to a request body, must accept client_id, correlation_id, and payloads as keyword arguments decode_fn: a method to decode a response body into response objects. The response objects must be object-like and have topic and partition attributes consumer_group: [string], optional. Indicates the request should be directed to the Offset Coordinator for the specified consumer_group. Return ====== deferred yielding a list of response objects in the same order as the supplied payloads, or None if decode_fn is None. Raises ====== FailedPayloadsError, LeaderUnavailableError, PartitionUnavailableError, """ # Calling this without payloads is nonsensical if not payloads: raise ValueError("Payloads parameter is empty") # Group the requests by topic+partition original_keys = [] payloads_by_broker = collections.defaultdict(list) # Go through all the payloads, lookup the leader/coordinator for that # payload's topic/partition or consumer group. If there's no # leader/coordinator (broker), raise. For each broker, keep # a list of the payloads to be sent to it. Also, for each payload in # the list of payloads, make a corresponding list (original_keys) with # the topic/partition in the same order, so we can lookup the returned # result(s) by that topic/partition key in the set of returned results # and return them in a list the same order the payloads were supplied for payload in payloads: # get leader/coordinator, depending on consumer_group if consumer_group is None: leader = yield self._get_leader_for_partition( payload.topic, payload.partition) if leader is None: raise LeaderUnavailableError( "Leader not available for topic %s partition %s" % (payload.topic, payload.partition)) else: leader = yield self._get_coordinator_for_group(consumer_group) if leader is None: raise ConsumerCoordinatorNotAvailableError( "Coordinator not available for group: %s" % (consumer_group)) payloads_by_broker[leader].append(payload) original_keys.append((payload.topic, payload.partition)) # Accumulate the responses in a dictionary acc = {} # The kafka server doesn't send replies to produce requests # with acks=0. In that case, our decode_fn will be # None, and we need to let the brokerclient know not # to expect a reply. makeRequest() returns a deferred # regardless, but in the expectResponse=False case, it will # fire as soon as the request is sent, and it can errBack() # due to being cancelled prior to the broker being able to # send the request. expectResponse = decode_fn is not None # keep a list of payloads that were failed to be sent to brokers failed_payloads = [] # Keep track of outstanding requests in a list of deferreds inFlight = [] # and the payloads that go along with them payloadsList = [] # For each broker, send the list of request payloads, for broker_meta, payloads in payloads_by_broker.items(): broker = self._get_brokerclient(broker_meta.host, broker_meta.port) requestId = self._next_id() request = encoder_fn(client_id=self._clientIdBytes, correlation_id=requestId, payloads=payloads) # Make the request d = self._make_request_to_broker(broker, requestId, request, expectResponse=expectResponse) inFlight.append(d) payloadsList.append(payloads) # Wait for all the responses to come back, or the requests to fail results = yield DeferredList(inFlight, consumeErrors=True) # We now have a list of (succeeded, response/Failure) tuples. Check 'em for (success, response), payloads in zip(results, payloadsList): if not success: # The brokerclient deferred was errback()'d: # The send failed, or this request was cancelled (by timeout) log.debug("%r: request:%r to broker failed: %r", self, payloads, response) failed_payloads.extend([(p, response) for p in payloads]) continue if not expectResponse: continue # Successful request/response. Decode it and store by topic/part for response in decode_fn(response): acc[(response.topic, response.partition)] = response # Order the accumulated responses by the original key order # Note that this scheme will throw away responses which we did # not request. See test_send_fetch_request, where the response # includes an error, but for a topic/part we didn't request. # Since that topic/partition isn't in original_keys, we don't pass # it back from here and it doesn't error out. # If any of the payloads failed, fail responses = [acc[k] for k in original_keys if k in acc] if acc else [] if failed_payloads: self.reset_all_metadata() raise FailedPayloadsError(responses, failed_payloads) returnValue(responses)