def errback_broken_link(self, failure: Failure) -> FoundLink: # Failure may not be the right typehint """ Handles behavior for links which cause Twisted failures - which is most of the broken links this spider hopes to find :param failure: A Twisted failure raised by the Retry middleware :return: None """ # Structure of this function heavily inspired by: # https://docs.scrapy.org/en/latest/topics/request-response.html#topics-request-response-ref-errbacks # If its a TCP or DNS error, short-circuit to the pipeline if failure.check(DNSLookupError, TCPTimedOutError): self.logger.info(f'Handled DNS/TCP related error. {failure.request.url}') request = failure.request dummy_response = Response( url=request.url, status=404, # Kind of a lie request=request ) yield from self.parse_broken_link(dummy_response) # If the client timed out, report that elif failure.check(TimeoutError): self.logger.info(f'Client timeout. {failure.request.url}') self.logger.error(repr(failure))
def deferredDHCPRequestErrback(self, failure: Failure) -> Optional[Failure]: if failure.check(FirstError): # If an error occurred, cancel any other pending requests. # (The error is likely to occur for those requests, too.) # Unfortunately we can't cancel using the DeferredList, since # the DeferredList considers itself "called" the moment the first # errback is invoked. self.cancelAll(self.deferredDHCPRequests) # Suppress further error handling. The original Deferred's errback # has already been called. return None elif failure.check(DHCPProbeException): log.msg("DHCP probe failed. %s" % failure.getErrorMessage()) elif failure.check(CancelledError): # Intentionally cancelled; no need to spam the log. pass else: log.err( failure, "DHCP probe on '%s' failed with an unknown error." % (self.ifname), ) # Make sure the error is propagated to the DeferredList. # We need this so that the DeferredList knows to call us with # FirstError, which is our indicator to cancel the remaining calls. # (It's set to consumeErrors, so it won't spam the log.) return failure
def return_html_error( f: failure.Failure, request: Request, error_template: Union[str, jinja2.Template], ) -> None: """Sends an HTML error page corresponding to the given failure. Handles RedirectException and other CodeMessageExceptions (such as SynapseError) Args: f: the error to report request: the failing request error_template: the HTML template. Can be either a string (with `{code}`, `{msg}` placeholders), or a jinja2 template """ if f.check(CodeMessageException): # mypy doesn't understand that f.check asserts the type. cme: CodeMessageException = f.value # type: ignore code = cme.code msg = cme.msg if isinstance(cme, RedirectException): logger.info("%s redirect to %s", request, cme.location) request.setHeader(b"location", cme.location) request.cookies.extend(cme.cookies) elif isinstance(cme, SynapseError): logger.info("%s SynapseError: %s - %s", request, code, msg) else: logger.error( "Failed handle request %r", request, exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) elif f.check(CancelledError): code = HTTP_STATUS_REQUEST_CANCELLED msg = "Request cancelled" if not request._disconnected: logger.error( "Got cancellation before client disconnection when handling request %r", request, exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) else: code = HTTPStatus.INTERNAL_SERVER_ERROR msg = "Internal server error" logger.error( "Failed handle request %r", request, exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) if isinstance(error_template, str): body = error_template.format(code=code, msg=html.escape(msg)) else: body = error_template.render(code=code, msg=msg) respond_with_html(request, code, body)
def connectionLost(self, reason: Failure) -> None: if reason.check(ResponseDone): self.deferred.callback(self.length) elif reason.check(PotentialDataLoss): # stolen from https://github.com/twisted/treq/pull/49/files # http://twistedmatrix.com/trac/ticket/4840 self.deferred.callback(self.length) else: self.deferred.errback(reason)
def _timeout_to_request_timed_out_error(f: Failure): if f.check(twisted_error.TimeoutError, twisted_error.ConnectingCancelledError): # The TCP connection has its own timeout (set by the 'connectTimeout' param # on the Agent), which raises twisted_error.TimeoutError exception. raise RequestTimedOutError("Timeout connecting to remote server") elif f.check(defer.TimeoutError, ResponseNeverReceived): # this one means that we hit our overall timeout on the request raise RequestTimedOutError("Timeout waiting for response from remote server") return f
def return_json_error(f: failure.Failure, request: SynapseRequest) -> None: """Sends a JSON error response to clients.""" if f.check(SynapseError): # mypy doesn't understand that f.check asserts the type. exc: SynapseError = f.value # type: ignore error_code = exc.code error_dict = exc.error_dict() logger.info("%s SynapseError: %s - %s", request, error_code, exc.msg) elif f.check(CancelledError): error_code = HTTP_STATUS_REQUEST_CANCELLED error_dict = {"error": "Request cancelled", "errcode": Codes.UNKNOWN} if not request._disconnected: logger.error( "Got cancellation before client disconnection from %r: %r", request.request_metrics.name, request, exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) else: error_code = 500 error_dict = { "error": "Internal server error", "errcode": Codes.UNKNOWN } logger.error( "Failed handle request via %r: %r", request.request_metrics.name, request, exc_info=(f.type, f.value, f.getTracebackObject()), # type: ignore[arg-type] ) # Only respond with an error response if we haven't already started writing, # otherwise lets just kill the connection if request.startedWriting: if request.transport: try: request.transport.abortConnection() except Exception: # abortConnection throws if the connection is already closed pass else: respond_with_json( request, error_code, error_dict, send_cors=True, )
def connectionLost(self, reason: Failure) -> None: # If the maximum size was already exceeded, there's nothing to do. if self.deferred.called: return if reason.check(ResponseDone): self.deferred.callback(self.length) elif reason.check(PotentialDataLoss): # stolen from https://github.com/twisted/treq/pull/49/files # http://twistedmatrix.com/trac/ticket/4840 self.deferred.callback(self.length) else: self.deferred.errback(reason)
def _log_download_errors(self, spider_failure: Failure, download_failure: Failure, request: Request, spider: Spider) -> Union[Failure, None]: """Log and silence errors that come from the engine (typically download errors that got propagated thru here). spider_failure: the value passed into the errback of self.call_spider() download_failure: the value passed into _scrape2() from ExecutionEngine._handle_downloader_output() as "result" """ if not download_failure.check(IgnoreRequest): if download_failure.frames: logkws = self.logformatter.download_error(download_failure, request, spider) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, exc_info=failure_to_exc_info(download_failure), ) else: errmsg = download_failure.getErrorMessage() if errmsg: logkws = self.logformatter.download_error( download_failure, request, spider, errmsg) logger.log( *logformatter_adapter(logkws), extra={'spider': spider}, ) if spider_failure is not download_failure: return spider_failure return None
def __call__(self, argstr): args = argstr.split() resultContext = None if not args: #return command usage methods = {} for name,args,doc in self.exposedMethodInfo: methods[name] = {'args' : args, 'doc' : doc} resultContext = dict(description=self.__doc__, methods=methods) yield resultContext else: method = args.pop(0) try: wfd = defer.waitForDeferred( self.invoke(method,args) ) yield wfd resultContext = wfd.getResult() except: failure = Failure() if failure.check(DroneCommandFailed): resultContext = failure.value.resultContext else: #be nice and return something to the end user template = "[%(application)s] " template += "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error': True, 'code': -2, 'stacktrace': failure.getTraceback()} resultContext = self.resultContext(template, None, **context ) yield resultContext
def _start_stop_common(self, label, action): result = {} try: function = getattr(self.model, action) d = defer.maybeDeferred(function, label) wfd = defer.waitForDeferred(d) yield wfd result = wfd.getResult() #take this time to update the instance if isinstance(result, dict): thisInst = self.model.getInstance(label) thisInst.updateInfo(result) except: failure = Failure() if failure.check(DroneCommandFailed): result = failure.value.resultContext else: #log the error, allowing for debugging self.debugReport() #be nice and return something to the end user template = "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error': failure, 'code': -2} result = self.resultContext(template, None, **context) #finally wrap the failure into a known type result = Failure(DroneCommandFailed(result)) #AppInstances need a moment to be updated d = defer.Deferred() config.reactor.callLater(1.0, d.callback, None) wfd = defer.waitForDeferred(d) yield wfd wfd.getResult() yield result
def return_json_error(f: failure.Failure, request: SynapseRequest) -> None: """Sends a JSON error response to clients. """ if f.check(SynapseError): error_code = f.value.code error_dict = f.value.error_dict() logger.info("%s SynapseError: %s - %s", request, error_code, f.value.msg) else: error_code = 500 error_dict = {"error": "Internal server error", "errcode": Codes.UNKNOWN} logger.error( "Failed handle request via %r: %r", request.request_metrics.name, request, exc_info=(f.type, f.value, f.getTracebackObject()), ) # Only respond with an error response if we haven't already started writing, # otherwise lets just kill the connection if request.startedWriting: if request.transport: try: request.transport.abortConnection() except Exception: # abortConnection throws if the connection is already closed pass else: respond_with_json( request, error_code, error_dict, send_cors=True, )
def __call__(self, argstr): args = argstr.split() resultContext = None if not args: #return command usage methods = {} for name, args, doc in self.exposedMethodInfo: methods[name] = {'args': args, 'doc': doc} resultContext = dict(description=self.__doc__, methods=methods) yield resultContext else: method = args.pop(0) try: wfd = defer.waitForDeferred(self.invoke(method, args)) yield wfd resultContext = wfd.getResult() except: failure = Failure() if failure.check(DroneCommandFailed): resultContext = failure.value.resultContext else: #be nice and return something to the end user template = "[%(application)s] " template += "%s: %s" % (getException(failure), failure.getErrorMessage()) context = { 'error': True, 'code': -2, 'stacktrace': failure.getTraceback() } resultContext = self.resultContext(template, None, **context) yield resultContext
def connectionLost(self, reason: Failure = connectionDone) -> None: """Called by Twisted when the transport connection is lost. No need to write anything to transport here. """ # Cancel the timeout if not done yet self.setTimeout(None) # Notify the connection pool instance such that no new requests are # sent over current connection if not reason.check(connectionDone): self._conn_lost_errors.append(reason) self._conn_lost_deferred.callback(self._conn_lost_errors) for stream in self.streams.values(): if stream.metadata['request_sent']: close_reason = StreamCloseReason.CONNECTION_LOST else: close_reason = StreamCloseReason.INACTIVE stream.close(close_reason, self._conn_lost_errors, from_protocol=True) self.metadata['active_streams'] -= len(self.streams) self.streams.clear() self._pending_request_stream_pool.clear() self.conn.close_connection()
def assert_invalid_hostname(failure: Failure): from scrapy.core.http2.stream import InvalidHostname self.assertIsNotNone(failure.check(InvalidHostname)) error_msg = str(failure.value) self.assertIn('localhost', error_msg) self.assertIn('127.0.0.1', error_msg) self.assertIn(str(request), error_msg)
def _start_stop_common(self, label, action): result = {} try: function = getattr(self.model, action) d = defer.maybeDeferred(function, label) wfd = defer.waitForDeferred(d) yield wfd result = wfd.getResult() #take this time to update the instance if isinstance(result, dict): thisInst = self.model.getInstance(label) thisInst.updateInfo(result) except: failure = Failure() if failure.check(DroneCommandFailed): result = failure.value.resultContext else: #log the error, allowing for debugging self.debugReport() #be nice and return something to the end user template = "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error':failure,'code':-2} result = self.resultContext(template, None, **context) #finally wrap the failure into a known type result = Failure(DroneCommandFailed(result)) #AppInstances need a moment to be updated d = defer.Deferred() reactor.callLater(1.0, d.callback, None) wfd = defer.waitForDeferred(d) yield wfd wfd.getResult() yield result
def convert_cancelled(value: failure.Failure): # if the original deferred was cancelled, and our timeout has fired, then # the reason it was cancelled was due to our timeout. Turn the CancelledError # into a TimeoutError. if timed_out[0] and value.check(CancelledError): raise defer.TimeoutError("Timed out after %gs" % (timeout, )) return value
def processing_failed( failure: Failure, error_handlers: "ErrorHandlers" ) -> Deferred: # The failure processor writes to the request. If the # request is already finished we should suppress failure # processing. We don't return failure here because there # is no way to surface this failure to the user if the # request is finished. if request_finished[0]: if not failure.check(defer.CancelledError): log.err(failure, "Unhandled Error Processing Request.") return # If there are no more registered handlers, apply some defaults if len(error_handlers) == 0: if failure.check(HTTPException): he = failure.value request.setResponseCode(he.code) resp = he.get_response({}) for header, value in resp.headers: request.setHeader( ensure_utf8_bytes(header), ensure_utf8_bytes(value) ) return ensure_utf8_bytes(b"".join(resp.iter_encoded())) else: request.processingFailed(failure) return error_handler = error_handlers[0] # Each error handler is a tuple of # (list_of_exception_types, handler_fn) if failure.check(*error_handler[0]): d = maybeDeferred( self._app.execute_error_handler, error_handler[1], request, failure, ) d.addCallback(process) return d.addErrback(processing_failed, error_handlers[1:]) return processing_failed(failure, error_handlers[1:])
def handle_errors(self, failure: Failure) -> None: cleaned_traceback = self.clean_traceback(failure) self.log.warn( f"Unhandled error during operator bonded check: {cleaned_traceback}" ) if failure.check([self.OperatorNoLongerBonded]): # this type of exception we want to propagate because we will shut down failure.raiseException()
def fail(failure: Failure) -> None: # If the Deferred was cancelled (e.g. during shutdown) do not try to # reconnect (this will cause an infinite loop of errors). if failure.check(CancelledError) and self._stopping: return # For a different error, print the traceback and re-connect. failure.printTraceback(file=sys.__stderr__) self._connection_waiter = None self._connect()
def newfunc(*args,**kwargs): try: return func(*args,**kwargs) except: failure = Failure() caught_exc = failure.value err_msg = failure.getErrorMessage() if failure.check(exc): raise caught_exc #1 exc_inst = exc(err_msg) exc_inst.inner_exception = caught_exc raise exc_inst
def invoke(self, name, args): """Invoke Exposed Methods @param name (str) - name of method to invoke @param args (tuple) - arguments to pass to invoked method @return (defer.Deferred) """ if name not in self.exposedMethods: return defer.fail( DroneCommandFailed( self.resultContext( "[%(application)s] Unknown method '%(method)s'", method=name, error='unknown method'))) try: #our own form of maybeDeferred d = self.exposedMethods[name](*args) if isinstance(d, defer.Deferred): action = Action(' '.join([str(i) for i in \ (self.action, name) + tuple(args)]), d) return action.deferred elif isinstance(d, DroneCommandFailed): return defer.fail(d) elif isinstance(d, dict): return defer.succeed(d) elif isinstance(d, type(None)): #this just feels dirty return defer.succeed(d) elif isinstance(d, Failure): d.raiseException() #sigh #probably from a triggerred Event callback elif type(d) == types.InstanceType: return defer.succeed(None) return defer.fail(FormatError("Result is not formatted correctly you " + \ "must return self.resultContext or DroneCommandFailed." + \ "\nResult: <%s>" % (str(d),))) except: failure = Failure() if failure.check(DroneCommandFailed): template = "[%(application)s] %(description)s" context = failure.value.resultContext if not 'description' in context: context['description'] = failure.getErrorMessage() else: template = "[%(application)s] " + "%s: %s" % ( getException(failure), failure.getErrorMessage()) context = { 'error': True, 'code': -2, 'stacktrace': failure.getTraceback() } return defer.fail( DroneCommandFailed( self.resultContext(template, None, **context)))
def newfunc(*args, **kwargs): try: return func(*args, **kwargs) except: failure = Failure() caught_exc = failure.value err_msg = failure.getErrorMessage() if failure.check(exc): raise caught_exc #1 exc_inst = exc(err_msg) exc_inst.inner_exception = caught_exc raise exc_inst
def error_callback(self, failure: Failure) -> Optional[Union[Failure, Iterable[Request]]]: if failure.check(HttpError): response = failure.value.response if self.exclude_error(response.url): return None if response.status == 405 and response.request.method == 'HEAD': # Method 'HEAD' not allowed, repeat request with 'GET' return self.retry_request_with_get(response.request) self.logger.error("Please check link: %s", response) return failure
def return_html_error( f: failure.Failure, request: Request, error_template: Union[str, jinja2.Template], ) -> None: """Sends an HTML error page corresponding to the given failure. Handles RedirectException and other CodeMessageExceptions (such as SynapseError) Args: f: the error to report request: the failing request error_template: the HTML template. Can be either a string (with `{code}`, `{msg}` placeholders), or a jinja2 template """ if f.check(CodeMessageException): cme = f.value code = cme.code msg = cme.msg if isinstance(cme, RedirectException): logger.info("%s redirect to %s", request, cme.location) request.setHeader(b"location", cme.location) request.cookies.extend(cme.cookies) elif isinstance(cme, SynapseError): logger.info("%s SynapseError: %s - %s", request, code, msg) else: logger.error( "Failed handle request %r", request, exc_info=(f.type, f.value, f.getTracebackObject()), ) else: code = http.HTTPStatus.INTERNAL_SERVER_ERROR msg = "Internal server error" logger.error( "Failed handle request %r", request, exc_info=(f.type, f.value, f.getTracebackObject()), ) if isinstance(error_template, str): body = error_template.format(code=code, msg=html.escape(msg)) else: body = error_template.render(code=code, msg=msg) body_bytes = body.encode("utf-8") request.setResponseCode(code) request.setHeader(b"Content-Type", b"text/html; charset=utf-8") request.setHeader(b"Content-Length", b"%i" % (len(body_bytes), )) request.write(body_bytes) finish_request(request)
def handleDeferreds(labels): """Remember last yield is the return value, don't use return""" results = {} descriptions = [] ret = {} code = 0 for l in labels: try: d = defer.maybeDeferred(func, l, *args[1:], **kwargs) wfd = defer.waitForDeferred(d) yield wfd ret = wfd.getResult() except: failure = Failure() des = "%s: %s" % \ (getException(failure),failure.getErrorMessage()) if failure.check(DroneCommandFailed): result[l] = failure.value.resultContext if 'description' not in result[l]: result[l]['description'] = des result[l]['stacktrace'] = failure.getTraceback() result[l]['error'] = True if 'code' not in result[l]: result[l]['code'] = 1 else: ret = { 'description': des, 'code': 1, 'error': True, 'stacktrace': failure.getTraceback() } if not ret: #NoneType detection ret = {'description': str(ret), 'code': 0} if 'code' in ret: code += abs(ret['code']) results[l] = ret try: descriptions.append(results[l]['description']) except: self.debugReport() results['code'] = code try: results['description'] = '\n'.join(descriptions) except: results['description'] = None if len(labels) == 0: Label = labels[0] else: Label = None ret = self.resultContext('%(description)s', label=Label, **results) yield ret
def handleDeferreds(labels): """Remember last yield is the return value, don't use return""" results = {} descriptions = [] ret = {} code = 0 for l in labels: try: d = defer.maybeDeferred(func, l, *args[1:], **kwargs) wfd = defer.waitForDeferred(d) yield wfd ret = wfd.getResult() except: failure = Failure() des = "%s: %s" % \ (getException(failure),failure.getErrorMessage()) if failure.check(DroneCommandFailed): result[l] = failure.value.resultContext if 'description' not in result[l]: result[l]['description'] = des result[l]['stacktrace'] = failure.getTraceback() result[l]['error'] = True if 'code' not in result[l]: result[l]['code'] = 1 else: ret = { 'description': des, 'code': 1, 'error': True, 'stacktrace': failure.getTraceback() } if not ret: #NoneType detection ret = {'description' : str(ret), 'code' : 0} if 'code' in ret: code += abs(ret['code']) results[l] = ret try: descriptions.append(results[l]['description']) except: self.debugReport() results['code'] = code try: results['description'] = '\n'.join(descriptions) except: results['description'] = None if len(labels) == 0: Label = labels[0] else: Label = None ret = self.resultContext('%(description)s',label=Label,**results) yield ret
def _handlePrFailure(self, f: Failure, tupleSelector): if f.check(TimeoutError): logger.error( "Received no response from\nobservable %s\ntuple selector %s", self._filt, tupleSelector.toJsonStr() ) else: logger.error( "Unexpected error, %s\nobservable %s\ntuple selector %s", f, self._filt, tupleSelector.toJsonStr() )
def invoke(self, name, args): """Invoke Exposed Methods @param name (str) - name of method to invoke @param args (tuple) - arguments to pass to invoked method @return (defer.Deferred) """ if name not in self.exposedMethods: return defer.fail(DroneCommandFailed(self.resultContext( "[%(application)s] Unknown method '%(method)s'", method=name, error='unknown method')) ) try: #our own form of maybeDeferred d = self.exposedMethods[name](*args) if isinstance(d, defer.Deferred): action = Action(' '.join([str(i) for i in \ (self.action, name) + tuple(args)]), d) return action.deferred elif isinstance(d, DroneCommandFailed): return defer.fail(d) elif isinstance(d, dict): return defer.succeed(d) elif isinstance(d, type(None)): #this just feels dirty return defer.succeed(d) elif isinstance(d, Failure): d.raiseException() #sigh #probably from a triggerred Event callback elif type(d) == types.InstanceType: return defer.succeed(None) return defer.fail(FormatError("Result is not formatted correctly you " + \ "must return self.resultContext or DroneCommandFailed." + \ "\nResult: <%s>" % (str(d),))) except: failure = Failure() if failure.check(DroneCommandFailed): template = "[%(application)s] %(description)s" context = failure.value.resultContext if not 'description' in context: context['description'] = failure.getErrorMessage() else: template = "[%(application)s] " + "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error': True, 'code':-2, 'stacktrace': failure.getTraceback()} return defer.fail(DroneCommandFailed(self.resultContext(template, None, **context)) )
def startService(self): """Start All AppManager Services""" if self.scanning.called: #need to pre-populate values self.scanning = defer.maybeDeferred(self._first_scan) self.first_run = True self._task = task.LoopingCall(self.scan_app_instances) #plugins will be created and loaded when needed for shortname in config.APPLICATIONS.keys(): manager = None try: applog = logWithContext(type=shortname, route=SERVICENAME) applog('Loading Application Plugin') applog('Creating Application Manager') manager = AppManager(shortname) manager.parentService = self #check and see if the model is bound if not AppManager(shortname).running: applog('Starting Application Manager') manager.start() except: failure = Failure() #bad plugin, not adaptable failures = (InvalidPlugin, TypeError) if failure.check(*failures) and manager: log('plugin for %s is invalid' % (manager.name, )) manager.action.__class__.delete(manager.action) try: pluginFactory.delete_plugin(manager.model) except: pass #silence AppManager.delete(manager) if not config.EXCESSIVE_LOGGING: continue #avoid extra logging try: failure.raiseException() except: crashReport('ApplicationLoader', self) Service.startService(self) Event('instance-started').subscribe(self.reset_tracking) #wire allapps action into the server drone.builtins.update({ 'allapps': self.allapps_action, 'applist': self.applist_action, }) #delay scanning by some interval config.reactor.callLater(SERVICECONFIG.initial_delay, self._start_all_tasks)
def __handlePrFailure(self, f: Failure, payloadEnvelope: PayloadEnvelope, sendResponse: SendVortexMsgResponseCallable): payload = yield payloadEnvelope.decodePayloadDefer() action = payload.tuples[0] if f.check(TimeoutError): logger.error("Received no response from\nprocessor %s\naction %s", self._filt, action) else: logger.error("Unexpected error, %s\nprocessor %s\naction %s", f, self._filt, action) vortexLogFailure(f, logger) vortexMsg = yield PayloadEnvelope(filt=payloadEnvelope.filt, result=str( f.value)).toVortexMsgDefer() sendResponse(vortexMsg)
def droneblast(conversation, command): server = conversation.context.get('server') response = None d = None try: if not isinstance(server, Server): conversation.say('On what <b>server</b>?') raise AssertionError('incomplete converation context') options = {} if 'timeout' in conversation.context: options['timeout'] = conversation.context['timeout'] try: conversation.say("Running droned command...") d = server.manager.run(command, **options) deferreds = conversation.context.get('deferreds', []) deferreds.append(d) conversation.context.update({'deferreds': deferreds}) wfd = defer.waitForDeferred(d) yield wfd result = wfd.getResult() except: failure = Failure() if failure.check(DroneCommandFailed): rc = failure.value.resultContext conversation.say(rc.get('description') or str(rc), useHTML=False) else: conversation.say(failure.getTraceback(), useHTML=False) else: if isinstance(result, dict): output = result.values()[0].get('description', str(result)) else: output = str(result) deferreds = conversation.context.get('deferreds', []) try: deferreds.remove(d) except: pass conversation.context.update({'deferreds': deferreds}) conversation.say("Command completed\n%s" % output, useHTML=False) except AssertionError: pass except: result = Failure() yield result
def startService(self): """Start All AppManager Services""" if self.scanning.called: #need to pre-populate values self.scanning = defer.maybeDeferred(self._first_scan) self.first_run = True self._task = task.LoopingCall(self.scan_app_instances) #plugins will be created and loaded when needed for shortname in config.APPLICATIONS.keys(): manager = None try: applog = logWithContext(type=shortname,route=SERVICENAME) applog('Loading Application Plugin') applog('Creating Application Manager') manager = AppManager(shortname) manager.parentService = self #check and see if the model is bound if not AppManager(shortname).running: applog('Starting Application Manager') manager.start() except: failure = Failure() #bad plugin, not adaptable failures = (InvalidPlugin, TypeError) if failure.check(*failures) and manager: log('plugin for %s is invalid' % (manager.name,)) manager.action.__class__.delete(manager.action) try: pluginFactory.delete_plugin(manager.model) except: pass #silence AppManager.delete(manager) if not config.EXCESSIVE_LOGGING: continue #avoid extra logging try: failure.raiseException() except: crashReport('ApplicationLoader', self) Service.startService(self) Event('instance-started').subscribe(self.reset_tracking) #wire allapps action into the server drone.builtins.update({ 'allapps': self.allapps_action, 'applist': self.applist_action, }) #delay scanning by some interval config.reactor.callLater(SERVICECONFIG.initial_delay, self._start_all_tasks)
class TubConnector(object): """I am used to make an outbound connection. I am given a target TubID and a list of locationHints, and I try all of them until I establish a Broker connected to the target. I will consider redirections returned along the way. The first hint that yields a connected Broker will stop the search. This is a single-use object. The connection attempt begins as soon as my connect() method is called. I live until all but one of the TCP connections I initiated have finished closing down. This means that connection establishment attempts in progress are cancelled, and established connections (the ones which did *not* complete negotiation before the winning connection) have called their connectionLost() methods. """ failureReason = None CONNECTION_TIMEOUT = 60 timer = None def __init__(self, parent, tubref, connectionPlugins): self._logparent = log.msg(format="TubConnector created from " "%(fromtubid)s to %(totubid)s", fromtubid=parent.tubID, totubid=tubref.getTubID(), level=OPERATIONAL, facility="foolscap.connection", umid="pH4QDA") self.tub = parent self.target = tubref self.connectionPlugins = connectionPlugins self._connectionInfo = ConnectionInfo() self.remainingLocations = list(self.target.getLocations()) # attemptedLocations keeps track of where we've already tried to # connect, so we don't try them twice, even if they appear in the # hints multiple times. this isn't too clever: slight variations of # the same hint will fool it, but it should be enough to avoid # infinite redirection loops. self.attemptedLocations = [] # validHints tracks which hints were successfully turned into # endpoints. If we don't recognize any hint type in a FURL, # validHints will be empty when we're done, and we'll signal # NoLocationHintsError self.validHints = [] # pendingConnections contains a Deferred for each endpoint.connect() # that has started (but not yet established) a connection. We keep # track of these so we can shut them down (using d.cancel()) when we # stop connecting (either because one of the other connections # succeeded, or because someone told us to give up). self.pendingConnections = set() # self.pendingNegotiations maps Negotiation instances (connected but # not finished negotiation) to the hint that got us the connection. # We track these so we can abandon the negotiation. self.pendingNegotiations = {} def __repr__(self): s = object.__repr__(self) s = s[:-1] s += " from %s to %s>" % (self.tub.tubID, self.target.getTubID()) return s def log(self, *args, **kwargs): kwargs['parent'] = kwargs.get('parent') or self._logparent kwargs['facility'] = kwargs.get('facility') or "foolscap.connection" return log.msg(*args, **kwargs) def getConnectionInfo(self): return self._connectionInfo def connect(self): """Begin the connection process. This should only be called once. This will either result in the successful Negotiation object invoking the parent Tub's brokerAttached() method, or us calling the Tub's connectionFailed() method.""" self.tub.connectorStarted(self) timeout = self.tub._test_options.get('connect_timeout', self.CONNECTION_TIMEOUT) self.timer = reactor.callLater(timeout, self.connectionTimedOut) self.active = True self.connectToAll() def stopConnectionTimer(self): if self.timer: self.timer.cancel() del self.timer def shutdown(self): self.active = False self.remainingLocations = [] self.stopConnectionTimer() self.cancelRemainingConnections() def cancelRemainingConnections(self): for d in list(self.pendingConnections): d.cancel() # this will trigger self._connectionFailed(), via the errback, # with a ConnectingCancelledError for n in self.pendingNegotiations.keys(): n.transport.loseConnection() # triggers n.connectionLost(), then self.connectorNegotiationFailed() def connectToAll(self): while self.remainingLocations: location = self.remainingLocations.pop() if location in self.attemptedLocations: continue self.attemptedLocations.append(location) lp = self.log("considering hint: %s" % (location,)) d = get_endpoint(location, self.connectionPlugins, self._connectionInfo) # no handler for this hint?: InvalidHintError thrown here def _good_hint(res, location=location): self._connectionInfo._set_connection_status(location, "connecting") self.validHints.append(location) (ep, host) = res self.log("connecting to hint: %s" % (location,), parent=lp, umid="9iX0eg") return ep.connect(TubConnectorFactory(self, host, location, lp)) d.addCallback(_good_hint) self.pendingConnections.add(d) def _remove(res, d=d): self.pendingConnections.remove(d) return res d.addBoth(_remove) d.addCallback(self._connectionSuccess, location, lp) d.addErrback(self._connectionFailed, location, lp) if self.tub._test_options.get("debug_stall_second_connection"): # for unit tests, hold off on making the second connection # for a moment. This allows the first connection to get to a # known state. reactor.callLater(0.1, self.connectToAll) return self.checkForFailure() def connectionTimedOut(self): # this timer is for the overall connection attempt, not each # individual endpoint/TCP connector self.timer = None why = "no connection established within client timeout" self.failureReason = Failure(NegotiationError(why)) self.shutdown() self.failed() def _connectionFailed(self, reason, hint, lp): # this is called if some individual TCP connection cannot be # established if reason.check(error.ConnectionRefusedError): description = "connection refused" self.log("connection refused for %s" % hint, level=OPERATIONAL, parent=lp, umid="rSrUxQ") elif reason.check(error.ConnectingCancelledError, defer.CancelledError): description = "abandoned" self.log("abandoned attempt to %s" % hint, level=OPERATIONAL, parent=lp, umid="CC8vwg") elif reason.check(InvalidHintError): description = "bad hint: %s" % str(reason.value) self.log("unable to use hint: %s: %s" % (hint, reason.value), level=UNUSUAL, parent=lp, umid="z62ctA") else: # some errors, like txsocksx.errors.ServerFailure, extend # Exception without defining a __str__, so when one is # constructed without arguments, their str() is empty, which is # not very useful. Their repr() at least includes the exception # name. In general, str() is better than repr(), since it lets # the exception designer build a human-meaningful string, so # we'll prefer str() unless it's empty. why = str(reason.value) or repr(reason.value) description = "failed to connect: %s" % why log.err(reason, "failed to connect to %s" % hint, level=CURIOUS, parent=lp, facility="foolscap.connection", umid="2PEowg") suffix = getattr(reason.value, "foolscap_connection_handler_error_suffix", None) if suffix: description += suffix self._connectionInfo._set_connection_status(hint, description) if not self.failureReason: self.failureReason = reason self.checkForFailure() self.checkForIdle() def _connectionSuccess(self, p, hint, lp): # fires with the Negotiation protocol instance, after # p.makeConnection(transport) returns, which is after # p.connectionMade() returns self.log("connected to %s, beginning negotiation" % hint, level=OPERATIONAL, parent=lp, umid="VN0XGQ") self.pendingNegotiations[p] = hint self._connectionInfo._set_connection_status(hint, "negotiating") def redirectReceived(self, newLocation): # the redirected connection will disconnect soon, which will trigger # connectorNegotiationFailed(), so we don't have to do a self.remainingLocations.append(newLocation) self.connectToAll() def connectorNegotiationFailed(self, n, location, reason): assert isinstance(n, self.tub.negotiationClass) # this is called if protocol negotiation cannot be established, or if # the connection is closed for any reason prior to switching to the # Banana protocol # abandoned connections will not have hit _connectionSuccess, so they # won't have been added to pendingNegotiations self.pendingNegotiations.pop(n, None) description = "negotiation failed: %s" % str(reason.value) self._connectionInfo._set_connection_status(location, description) assert isinstance(reason, Failure), \ "Hey, %s isn't a Failure" % (reason,) if (not self.failureReason or isinstance(reason, NegotiationError)): # don't let mundane things like ConnectionFailed override the # actually significant ones like NegotiationError self.failureReason = reason self.checkForFailure() self.checkForIdle() def connectorNegotiationComplete(self, n, location): assert isinstance(n, self.tub.negotiationClass) # 'factory' has just completed negotiation, so abandon all the other # connection attempts self.log("connectorNegotiationComplete, %s won" % n) self.pendingNegotiations.pop(n, None) # this one succeeded self._connectionInfo._set_connection_status(location, "successful") self._connectionInfo._set_winning_hint(location) self._connectionInfo._set_established_at(time.time()) self.active = False if self.timer: self.timer.cancel() self.timer = None self.cancelRemainingConnections() # abandon the others self.checkForIdle() def checkForFailure(self): if not self.active: return if (self.remainingLocations or self.pendingConnections or self.pendingNegotiations): return if not self.validHints: self.failureReason = Failure(NoLocationHintsError()) # we have no more options, so the connection attempt will fail. The # getBrokerForTubRef may have succeeded, however, if the other side # tried to connect to us at exactly the same time, they were the # master, they established their connection first (but the final # decision is still in flight), and they hung up on our connection # because they felt it was a duplicate. So, if self.failureReason # indicates a duplicate connection, do not signal a failure here. We # leave the connection timer in place in case they lied about having # a duplicate connection ready to go. if (self.failureReason.check(RemoteNegotiationError) and isSubstring(self.failureReason.value.args[0], "Duplicate connection")): self.log("TubConnector.checkForFailure: connection attempt " "failed because the other end decided ours was a " "duplicate connection, so we won't signal the " "failure here") return self.failed() def failed(self): self.stopConnectionTimer() self.active = False if self.failureReason: self.failureReason._connectionInfo = self._connectionInfo self.tub.connectionFailed(self.target, self.failureReason) self.tub.connectorFinished(self) def checkForIdle(self): # When one connection finishes negotiation, the others are cancelled # to hurry them along their way towards disconnection. The last one # to resolve finally causes us to notify our parent Tub. if (self.remainingLocations or self.pendingConnections or self.pendingNegotiations): return # we have no more outstanding connections (either in progress or in # negotiation), so this connector is finished. self.log("connectorFinished (%s)" % self) self.tub.connectorFinished(self)
else: # there's no regex in cmdinfo to extract info log.msg("%s: no callback registered" % self.state) self.notify_success(self.waitbuf) self.waitbuf = self.waitbuf.replace(match.group(), '', 1) self.transition_to_idle() else: # there is no end of response detected, so we have either an error # or a split command (like send_sms, save_sms, etc.) match = E.extract_error(self.waitbuf) if match: exception, error, m = match e = exception(error) f = Failure(e) if not f.check(*self.cmd.nolog): log.err(e, "waiting") # send the failure back self.notify_failure(f) # remove the exception string from the waitbuf self.waitbuf = self.waitbuf.replace(m.group(), '', 1) self.transition_to_idle() else: match = SPLIT_PROMPT.search(data) if match: log.msg("waiting: split command prompt detected") self.send_splitcmd() self.waitbuf = self.waitbuf.replace(match.group(), '', 1) else: log.msg("waiting: unmatched data %r" % data)
def _handle_send_response(self, result, payloadsByTopicPart, deferredsByTopicPart): """Handle the response from our client to our send_produce_request This is a bit complex. Failures can happen in a few ways: 1. The client sent an empty list, False, None or some similar thing as the result, but we were expecting real responses. 2. The client had a failure before it even tried sending any requests to any brokers. a. Kafka error: See if we can retry the whole request b. Non-kafka: Figure it's a programming error, fail all deferreds 3. The client sent all the requests (it's all or none) to the brokers but one or more request failed (timed out before receiving a response, or the brokerclient threw some sort of exception on send In this case, the client throws FailedPayloadsError, and attaches the responses (NOTE: some can have errors!), and the payloads where the send itself failed to the exception. 4. The client sent all the requests, all responses were received, but the Kafka broker indicated an error with servicing the request on some of the responses. """ def _deliver_result(d_list, result=None): """Possibly callback each deferred in a list with single result""" for d in d_list: if not isinstance(d, Deferred): # nested list... _deliver_result(d, result) else: # We check d.called since the request could have been # cancelled while we waited for the response if not d.called: d.callback(result) def _do_retry(payloads): # We use 'fail_on_error=False' because we want our client to # process every response that comes back from the brokers so # we can determine which requests were successful, and which # failed for retry d = self.client.send_produce_request( payloads, acks=self.req_acks, timeout=self.ack_timeout, fail_on_error=False) self._req_attempts += 1 # add our handlers d.addBoth(self._handle_send_response, payloadsByTopicPart, deferredsByTopicPart) return d def _cancel_retry(failure, dc): # Cancel the retry callLater and pass-thru the failure dc.cancel() # cancel all the top-level deferreds associated with the request _deliver_result(deferredsByTopicPart.values(), failure) return failure def _check_retry_payloads(failed_payloads_with_errs): """Check our retry count and retry after a delay or errback If we have more retries to try, create a deferred that will fire with the result of delayed retry. If not, errback the remaining deferreds with failure Params: failed_payloads - list of (payload, failure) tuples """ # Do we have retries left? if self._req_attempts >= self._max_attempts: # No, no retries left, fail each failed_payload with its # associated failure for p, f in failed_payloads_with_errs: t_and_p = TopicAndPartition(p.topic, p.partition) _deliver_result(deferredsByTopicPart[t_and_p], f) return # Retries remain! Schedule one... d = Deferred() dc = self.client.reactor.callLater( self._retry_interval, d.callback, [p for p, f in failed_payloads]) self._retry_interval *= self.RETRY_INTERVAL_FACTOR # Cancel the callLater when request is cancelled before it fires d.addErrback(_cancel_retry, dc) # Reset the topic metadata for all topics which had failed_requests # where the failures were of the kind UnknownTopicOrPartitionError # or NotLeaderForPartitionError, since those indicate our client's # metadata is out of date. reset_topics = set() for payload, e in failed_payloads: if (isinstance(e, NotLeaderForPartitionError) or isinstance(e, UnknownTopicOrPartitionError)): reset_topics.add(payload.topic) if reset_topics: self.client.reset_topic_metadata(*reset_topics) d.addCallback(_do_retry) return d # The payloads we need to retry, if we still can.. failed_payloads = [] # In the case we are sending requests without requiring acks, the # brokerclient will immediately callback() the deferred upon send with # None. In that case, we just iterate over all the deferreds in # deferredsByTopicPart and callback them with None # If we are expecting responses/acks, and we get an empty result, we # callback with a Failure of NoResponseError if not result: # Success, but no results, is that what we're expecting? if self.req_acks == PRODUCER_ACK_NOT_REQUIRED: result = None else: # We got no result, but we were expecting one? Fail everything! result = Failure(NoResponseError()) _deliver_result(deferredsByTopicPart.values(), result) return elif isinstance(result, Failure): # Failure! Was it total, or partial? if not result.check(FailedPayloadsError): # Total failure of some sort! # The client was unable to send the request at all. If it's # a KafkaError (probably Leader/Partition unavailable), retry if result.check(KafkaError): # Yep, a kafak error. Set failed_payloads, and we'll retry # them all below. Set failure for errback to callers if we # are all out of retries failure, result = result, [] # no succesful results, retry failed_payloads = [(p, failure) for p in payloadsByTopicPart.values()] else: # Was the request cancelled? if not result.check(tid_CancelledError): # Uh Oh, programming error? Log it! log.error("Unexpected failure: %r in " "_handle_send_response", result) # Cancelled, or programming error, we fail the requests _deliver_result(deferredsByTopicPart.values(), result) return else: # FailedPayloadsError: This means that some/all of the # requests to a/some brokerclients failed to send. # Pull the successful responses and the failed_payloads off # the exception and handle them below. Preserve the # FailedPayloadsError as 'failure' failure = result result = failure.value.args[0] failed_payloads = failure.value.args[1] # Do we have results? Iterate over them and if the response indicates # success, then callback the associated deferred. If the response # indicates an error, then setup that request for retry. # NOTE: In this case, each failed_payload get it's own error... for res in result: t_and_p = TopicAndPartition(res.topic, res.partition) t_and_p_err = _check_error(res, raiseException=False) if not t_and_p_err: # Success for this topic/partition d_list = deferredsByTopicPart[t_and_p] _deliver_result(d_list, res) else: p = payloadsByTopicPart[t_and_p] failed_payloads.append((p, t_and_p_err)) # Were there any failed requests to possibly retry? if failed_payloads: return _check_retry_payloads(failed_payloads) return
def startBuild(self, build_status, workerforbuilder): """This method sets up the build, then starts it by invoking the first Step. It returns a Deferred which will fire when the build finishes. This Deferred is guaranteed to never errback.""" self.workerforbuilder = workerforbuilder self.conn = None worker = workerforbuilder.worker log.msg("{}.startBuild".format(self)) self.build_status = build_status # TODO: this will go away when build collapsing is implemented; until # then we just assign the build to the first buildrequest brid = self.requests[0].id builderid = yield self.getBuilderId() self.buildid, self.number = \ yield self.master.data.updates.addBuild( builderid=builderid, buildrequestid=brid, workerid=worker.workerid) self.stopBuildConsumer = yield self.master.mq.startConsuming( self.controlStopBuild, ("control", "builds", str(self.buildid), "stop")) # the preparation step counts the time needed for preparing the worker and getting the # locks. # we cannot use a real step as we don't have a worker yet. self.preparation_step = buildstep.BuildStep(name="worker_preparation") self.preparation_step.setBuild(self) yield self.preparation_step.addStep() self.setupOwnProperties() # then narrow WorkerLocks down to the right worker self.locks = [(l.getLockForWorker(workerforbuilder.worker.workername), a) for l, a in self.locks] metrics.MetricCountEvent.log('active_builds', 1) # make sure properties are available to people listening on 'new' # events yield self._flushProperties(None) self.build_status.buildStarted(self) yield self.master.data.updates.setBuildStateString( self.buildid, 'starting') yield self.master.data.updates.generateNewBuildEvent(self.buildid) try: self.setupBuild() # create .steps except Exception: yield self.buildPreparationFailure(Failure(), "setupBuild") self.buildFinished(['Build.setupBuild', 'failed'], EXCEPTION) return # flush properties in the beginning of the build yield self._flushProperties(None) yield self.master.data.updates.setBuildStateString( self.buildid, 'preparing worker') try: ready_or_failure = yield workerforbuilder.prepare(self) except Exception: ready_or_failure = Failure() # If prepare returns True then it is ready and we start a build # If it returns failure then we don't start a new build. if ready_or_failure is not True: yield self.buildPreparationFailure(ready_or_failure, "worker_prepare") if self.stopped: self.buildFinished(["worker", "cancelled"], self.results) elif isinstance(ready_or_failure, Failure) and \ ready_or_failure.check(interfaces.LatentWorkerCannotSubstantiate): self.buildFinished(["worker", "cannot", "substantiate"], EXCEPTION) else: self.buildFinished(["worker", "not", "available"], RETRY) return # ping the worker to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the worker is live but is pushing lots of data to # us in a build. yield self.master.data.updates.setBuildStateString( self.buildid, 'pinging worker') log.msg("starting build {}.. pinging the worker {}".format( self, workerforbuilder)) try: ping_success_or_failure = yield workerforbuilder.ping() except Exception: ping_success_or_failure = Failure() if ping_success_or_failure is not True: yield self.buildPreparationFailure(ping_success_or_failure, "worker_ping") self.buildFinished(["worker", "not", "pinged"], RETRY) return self.conn = workerforbuilder.worker.conn # To retrieve the builddir property, the worker must be attached as we # depend on its path_module. Latent workers become attached only after # preparing them, so we can't setup the builddir property earlier like # the rest of properties self.setupWorkerBuildirProperty(workerforbuilder) self.setupWorkerForBuilder(workerforbuilder) self.subs = self.conn.notifyOnDisconnect(self.lostRemote) # tell the remote that it's starting a build, too try: yield self.conn.remoteStartBuild(self.builder.name) except Exception: yield self.buildPreparationFailure(Failure(), "start_build") self.buildFinished(["worker", "not", "building"], RETRY) return yield self.master.data.updates.setBuildStateString( self.buildid, 'acquiring locks') yield self.acquireLocks() yield self.master.data.updates.setStepStateString( self.preparation_step.stepid, "worker ready") yield self.master.data.updates.finishStep(self.preparation_step.stepid, SUCCESS, False) yield self.master.data.updates.setBuildStateString( self.buildid, 'building') # start the sequence of steps self.startNextStep()
class CommandTestMixin(ConnectionTestMixin): """A subclass of `ConnectionTestMixin` that also sets up a command plugin in addition to a connection and transport.""" #: The command plugin class to test. command_class = None #: Any additional help arguments to test in `test_help`. help_arguments = tuple() def setUp(self): super(CommandTestMixin, self).setUp() self.default_venue = self.connection.nickname name = self.command_class.name self.keyword = name.rsplit('/', 1)[-1].rsplit('.', 1)[-1].lower() self.command = self.connection.settings.enable(name, [self.keyword]) self.reply_buffer = iter([]) self.failure = None def command_message(self, content, **kwargs): action = kwargs.pop('action', 'command') kwargs.setdefault('actor', self.other_users[0]) kwargs.setdefault('venue', self.default_venue) kwargs.setdefault('subaction', self.keyword) return Message(self.connection, False, action, content=content, **kwargs) @inlineCallbacks def send_command(self, content, **kwargs): if isinstance(content, unicode): content = content.encode(DEFAULT_ENCODING) request = self.command_message(content, **kwargs) try: response = yield self.command.respond_to(request) except UserVisibleError: self.failure = Failure() else: if response is not None: self.reply_buffer = ReplyBuffer(response, request) def assert_reply(self, expected): finished = maybeDeferred(next, self.reply_buffer, None) finished.addCallback(self.assertEqual, expected) return finished def assert_no_replies(self): finished = maybeDeferred(next, self.reply_buffer, None) finished.addCallback(self.assertIsNone) return finished def assert_error(self, expected): self.assertIsNotNone(self.failure) self.assertIsNotNone(self.failure.check(UserVisibleError)) self.assertEqual(self.failure.getErrorMessage(), expected) self.failure = None @staticmethod def use_cassette(cassette_name): cassette_path = os.path.join(CASSETTE_LIBRARY, cassette_name + '.json') cassette_agent = CassetteAgent(Agent(reactor), cassette_path) def decorator(func): @wraps(func) def wrapper(self, *args, **kwargs): self.command.agent = IdentifyingAgent(ContentDecoderAgent( RedirectAgent(cassette_agent), [('gzip', GzipDecoder)])) finished = maybeDeferred(func, self, *args, **kwargs) finished.addCallback(cassette_agent.save) return finished return wrapper return decorator def test_help(self): """Ensure that command help doesn't cause errors.""" for content in ('',) + self.help_arguments: self.send_command(content, action='cmdhelp')
def _handle_send_response(self, result, payloadsByTopicPart, deferredsByTopicPart): """Handle the response from our client to our send_produce_request This is a bit complex. Failures can happen in a few ways: 1) The client sent an empty list, False, None or some similar thing as the result, but we were expecting real responses. 2) The client had a failure before it even tried sending any requests to any brokers. a) Kafka error: See if we can retry the whole request b) Non-kafka: Figure it's a programming error, fail all deferreds 3) The client sent all the requests (it's all or none) to the brokers but one or more request failed (timed out before receiving a response, or the brokerclient threw some sort of exception on send In this case, the client throws FailedPayloadsError, and attaches the responses (NOTE: some can have errors!), and the payloads where the send itself failed to the exception. 4) The client sent all the requests, all responses were received, but the Kafka broker indicated an error with servicing the request on some of the responses. """ def _deliver_result(d_list, result=None): """Possibly callback each deferred in a list with single result""" for d in d_list: if not isinstance(d, Deferred): # nested list... _deliver_result(d, result) else: # We check d.called since the request could have been # cancelled while we waited for the response if not d.called: d.callback(result) def _do_retry(payloads): # We use 'fail_on_error=False' because we want our client to # process every response that comes back from the brokers so # we can determine which requests were successful, and which # failed for retry d = self.client.send_produce_request(payloads, acks=self.req_acks, timeout=self.ack_timeout, fail_on_error=False) self._req_attempts += 1 # add our handlers d.addBoth(self._handle_send_response, payloadsByTopicPart, deferredsByTopicPart) return d def _cancel_retry(failure, dc): # Cancel the retry callLater and pass-thru the failure dc.cancel() # cancel all the top-level deferreds associated with the request _deliver_result(deferredsByTopicPart.values(), failure) return failure def _check_retry_payloads(failed_payloads_with_errs): """Check our retry count and retry after a delay or errback If we have more retries to try, create a deferred that will fire with the result of delayed retry. If not, errback the remaining deferreds with failure Params: failed_payloads - list of (payload, failure) tuples """ # Do we have retries left? if self._req_attempts >= self._max_attempts: # No, no retries left, fail each failed_payload with its # associated failure for p, f in failed_payloads_with_errs: t_and_p = TopicAndPartition(p.topic, p.partition) _deliver_result(deferredsByTopicPart[t_and_p], f) return # Retries remain! Schedule one... d = Deferred() dc = self.client.reactor.callLater(self._retry_interval, d.callback, [p for p, f in failed_payloads]) self._retry_interval *= self.RETRY_INTERVAL_FACTOR # Cancel the callLater when request is cancelled before it fires d.addErrback(_cancel_retry, dc) # Reset the topic metadata for all topics which had failed_requests # where the failures were of the kind UnknownTopicOrPartitionError # or NotLeaderForPartitionError, since those indicate our client's # metadata is out of date. reset_topics = [] def _check_for_meta_error(tup): payload, failure = tup if (isinstance(failure, NotLeaderForPartitionError) or isinstance(failure, UnknownTopicOrPartitionError)): reset_topics.append(payload.topic) map(_check_for_meta_error, failed_payloads) if reset_topics: self.client.reset_topic_metadata(*reset_topics) d.addCallback(_do_retry) return d # The payloads we need to retry, if we still can.. failed_payloads = [] # In the case we are sending requests without requiring acks, the # brokerclient will immediately callback() the deferred upon send with # None. In that case, we just iterate over all the deferreds in # deferredsByTopicPart and callback them with None # If we are expecting responses/acks, and we get an empty result, we # callback with a Failure of NoResponseError if not result: # Success, but no results, is that what we're expecting? if self.req_acks == PRODUCER_ACK_NOT_REQUIRED: result = None else: # We got no result, but we were expecting one? Fail everything! result = Failure(NoResponseError()) _deliver_result(deferredsByTopicPart.values(), result) return elif isinstance(result, Failure): # Failure! Was it total, or partial? if not result.check(FailedPayloadsError): # Total failure of some sort! # The client was unable to send the request at all. If it's # a KafkaError (probably Leader/Partition unavailable), retry if result.check(KafkaError): # Yep, a kafak error. Set failed_payloads, and we'll retry # them all below. Set failure for errback to callers if we # are all out of retries failure, result = result, [] # no succesful results, retry failed_payloads = [(p, failure) for p in payloadsByTopicPart.values()] else: # Was the request cancelled? if not result.check(tid_CancelledError): # Uh Oh, programming error? Log it! log.error( "Unexpected failure: %r in " "_handle_send_response", result) # Cancelled, or programming error, we fail the requests _deliver_result(deferredsByTopicPart.values(), result) return else: # FailedPayloadsError: This means that some/all of the # requests to a/some brokerclients failed to send. # Pull the successful responses and the failed_payloads off # the exception and handle them below. Preserve the # FailedPayloadsError as 'failure' failure = result result = failure.value.args[0] failed_payloads = failure.value.args[1] # Do we have results? Iterate over them and if the response indicates # success, then callback the associated deferred. If the response # indicates an error, then setup that request for retry. # NOTE: In this case, each failed_payload get it's own error... for res in result: t_and_p = TopicAndPartition(res.topic, res.partition) t_and_p_err = _check_error(res, raiseException=False) if not t_and_p_err: # Success for this topic/partition d_list = deferredsByTopicPart[t_and_p] _deliver_result(d_list, res) else: p = payloadsByTopicPart[t_and_p] failed_payloads.append((p, t_and_p_err)) # Were there any failed requests to possibly retry? if failed_payloads: return _check_retry_payloads(failed_payloads) return
def startInstance(self, label): """Starts an application instance by label @param label: (string) @fires Event('instance-started') return defer.Deferred() """ template = '[%(application)s,%(label)s] %(description)s' context = { 'description': 'Failed to Start', 'code': 254 } result = {} thisInst = None try: if self.model.getInstance(label).running: context.update(self.model.statusInstance(label)) raise DroneCommandFailed(context) d = self._start_stop_common(label, 'startInstance') wfd = defer.waitForDeferred(d) yield wfd result = wfd.getResult() d = self.statusInstance(label) wfd = defer.waitForDeferred(d) yield wfd result.update(wfd.getResult()) #refresh the instance as it can change thisInst = self.model.getInstance(label) if isinstance(result, dict): context.update(result) elif isinstance(result, DroneCommandFailed): context.update(result.resultContext) if thisInst.running: Event('instance-started').fire(instance=thisInst) context['code'] = 0 raise AssertionError('ignore') raise DroneCommandFailed(context) except AssertionError: #update the instance model wfd = defer.waitForDeferred(self.statusInstance(label)) yield wfd result = wfd.getResult() except: thisInst = self.model.getInstance(label) failure = Failure() if failure.check(DroneCommandFailed): template = '%(description)s' context = failure.value.resultContext else: #log the error, allowing for debugging self.debugReport() #be nice and return something to the end user temp = "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error': failure, 'code': 253, 'description': temp} result = self.resultContext(template, thisInst, **context) try: thisInst = self.model.getInstance(label) thisInst.shouldBeRunning = True except: pass yield result
def stopInstance(self, label): """Stops an application instance by label @param label: (string) @fires Event('instance-stopped') return defer.Deferred() """ result = {} template = '[%(application)s,%(label)s] %(description)s' context = {'code': 254} thisInst = None try: thisInst = self.model.getInstance(label) thisInst.shouldBeRunning = False if not thisInst.running: context.update(self.model.statusInstance(label)) raise DroneCommandFailed(context) pid = thisInst.process.pid self.log("Trying to shutdown %d gracefully" % (pid,)) def failed(result): """attempting to be consistant""" self.log("Failed to shutdown process gracefully") return result def success(result): """attempting to be consistant""" self.log("process %d gracefully shutdown" % (pid,)) return result d = self._start_stop_common(label, 'stopInstance') d.addCallback(success) d.addErrback(failed) d.addErrback(self._killInstance, thisInst) wfd = defer.waitForDeferred(d) yield wfd #refresh the instance as it can change thisInst = self.model.getInstance(label) result = wfd.getResult() if isinstance(result, dict): context.update(result) elif isinstance(result, DroneCommandFailed): context.update(result.resultContext) if not thisInst.running: context['code'] = 0 Event('instance-stopped').fire(instance=thisInst) raise AssertionError('ignore me') raise DroneCommandFailed(context) except AssertionError: #update the instance model wfd = defer.waitForDeferred(self.statusInstance(label)) yield wfd result = wfd.getResult() result['code'] = context['code'] except: failure = Failure() if failure.check(DroneCommandFailed): context = failure.value.resultContext template = '%(description)s' else: temp = "%s: %s" % (getException(failure), failure.getErrorMessage()) context = {'error': failure, 'code': 253, 'description': temp} result = self.resultContext(template, thisInst, **context) try: thisInst = self.model.getInstance(label) thisInst.shouldBeRunning = False except: pass yield result
class TubConnector: """I am used to make an outbound connection. I am given a target TubID and a list of locationHints, and I try all of them until I establish a Broker connected to the target. I will consider redirections returned along the way. The first hint that yields a connected Broker will stop the search. If targetTubID is None, we are going to make an unencrypted connection. This is a single-use object. The connection attempt begins as soon as my connect() method is called. @param locationHints: the list of 'host:port' hints where the remote tub can be found. """ failureReason = None CONNECTION_TIMEOUT = 60 timer = None def __init__(self, parent, tubref): self.tub = parent self.target = tubref self.remainingLocations = self.target.getLocations() # attemptedLocations keeps track of where we've already try to # connect, so we don't try them twice. self.attemptedLocations = [] # pendingConnections contains a (PBClientFactory -> Connector) map # for pairs where connectTCP has started, but negotiation has not yet # completed. We keep track of these so we can shut them down when we # stop connecting (either because one of the connections succeeded, # or because someone told us to give up). self.pendingConnections = {} def connect(self): """Begin the connection process. This should only be called once. This will either result in the successful Negotiation object invoking the parent Tub's brokerAttached() method, our us calling the Tub's connectionFailed() method.""" timeout = self.tub.options.get('connect_timeout', self.CONNECTION_TIMEOUT) self.timer = reactor.callLater(timeout, self.connectionTimedOut) self.active = True self.connectToAll() def stopConnectionTimer(self): if self.timer: self.timer.cancel() del self.timer def shutdown(self): self.active = False self.stopConnectionTimer() for c in self.pendingConnections.values(): c.disconnect() def connectToAll(self): while self.remainingLocations: location = self.remainingLocations.pop() if location in self.attemptedLocations: continue self.attemptedLocations.append(location) host, port = location.split(":") port = int(port) f = TubConnectorClientFactory(self, host) c = reactor.connectTCP(host, port, f) self.pendingConnections[f] = c if self.tub.options.get("debug_stall_second_connection"): # for unit tests, hold off on making the second connection # for a moment. This allows the first connection to get to a # known state. reactor.callLater(0.1, self.connectToAll) return self.checkForFailure() def connectionTimedOut(self): self.timer = None why = "no connection established within client timeout" self.failureReason = Failure(NegotiationError(why)) self.shutdown() self.failed() def clientConnectionFailed(self, factory, reason): # this is called if a TCP connection cannot be established if not self.failureReason: self.failureReason = reason del self.pendingConnections[factory] self.checkForFailure() def redirectReceived(self, newLocation): # the redirected connection will disconnect soon, which will trigger # negotiationFailed(), so we don't have to do a # del self.pendingConnections[factory] self.remainingLocations.append(newLocation) self.connectToAll() def negotiationFailed(self, factory, reason): # this is called if protocol negotiation cannot be established, or if # the connection is closed for any reason prior to switching to the # Banana protocol assert isinstance(reason, Failure), \ "Hey, %s isn't a Failure" % (reason,) if (not self.failureReason or isinstance(reason, NegotiationError)): # don't let mundane things like ConnectionFailed override the # actually significant ones like NegotiationError self.failureReason = reason del self.pendingConnections[factory] self.checkForFailure() def negotiationComplete(self, factory): # 'factory' has just completed negotiation, so abandon all the other # connection attempts self.active = False if self.timer: self.timer.cancel() self.timer = None del self.pendingConnections[factory] # this one succeeded for f in self.pendingConnections.keys(): # abandon the others # this will trigger clientConnectionFailed and/or # negotiationFailed calls f.disconnect() def checkForFailure(self): if not self.active: return if self.remainingLocations: return if self.pendingConnections: return # we have no more options, so the connection attempt will fail. The # getBrokerForTubRef may have succeeded, however, if the other side # tried to connect to us at exactly the same time, they were the # master, they established their connection first (but the final # decision is still in flight), and they hung up on our connection # because they felt it was a duplicate. So, if self.failureReason # indicates a duplicate connection, do not signal a failure here. We # leave the connection timer in place in case they lied about having # a duplicate connection ready to go. if (self.failureReason.check(RemoteNegotiationError) and isSubstring(self.failureReason.value.args[0], "Duplicate connection")): log.msg("TubConnector.checkForFailure: connection attempt " "failed because the other end decided ours was a " "duplicate connection, so we won't signal the " "failure here") return self.failed() def failed(self): self.stopConnectionTimer() self.active = False self.tub.connectionFailed(self.target, self.failureReason)
def mock_errback(result: Failure): self.assertTrue(result.check(CancelledError)) self.errbacks_called += 1
def startBuild(self, build_status, workerforbuilder): """This method sets up the build, then starts it by invoking the first Step. It returns a Deferred which will fire when the build finishes. This Deferred is guaranteed to never errback.""" self.workerforbuilder = workerforbuilder self.conn = None worker = workerforbuilder.worker log.msg("%s.startBuild" % self) self.build_status = build_status # TODO: this will go away when build collapsing is implemented; until # then we just assign the build to the first buildrequest brid = self.requests[0].id builderid = yield self.getBuilderId() self.buildid, self.number = \ yield self.master.data.updates.addBuild( builderid=builderid, buildrequestid=brid, workerid=worker.workerid) self.stopBuildConsumer = yield self.master.mq.startConsuming(self.controlStopBuild, ("control", "builds", str(self.buildid), "stop")) self.setupOwnProperties() # then narrow WorkerLocks down to the right worker self.locks = [(l.getLock(workerforbuilder.worker), a) for l, a in self.locks] metrics.MetricCountEvent.log('active_builds', 1) # make sure properties are available to people listening on 'new' # events yield self._flushProperties(None) self.build_status.buildStarted(self) yield self.master.data.updates.setBuildStateString(self.buildid, u'starting') yield self.master.data.updates.generateNewBuildEvent(self.buildid) try: self.setupBuild() # create .steps except Exception: yield self.buildPreparationFailure(Failure(), "worker_prepare") self.buildFinished(['Build.setupBuild', 'failed'], EXCEPTION) return # flush properties in the beginning of the build yield self._flushProperties(None) yield self.master.data.updates.setBuildStateString(self.buildid, u'preparing worker') try: ready_or_failure = yield workerforbuilder.prepare(self) except Exception: ready_or_failure = Failure() # If prepare returns True then it is ready and we start a build # If it returns failure then we don't start a new build. if ready_or_failure is not True: yield self.buildPreparationFailure(ready_or_failure, "worker_prepare") if self.stopped: self.buildFinished(["worker", "cancelled"], self.results) elif isinstance(ready_or_failure, Failure) and ready_or_failure.check(interfaces.LatentWorkerCannotSubstantiate): self.buildFinished(["worker", "cannot", "substantiate"], EXCEPTION) else: self.buildFinished(["worker", "not", "available"], RETRY) return # ping the worker to make sure they're still there. If they've # fallen off the map (due to a NAT timeout or something), this # will fail in a couple of minutes, depending upon the TCP # timeout. # # TODO: This can unnecessarily suspend the starting of a build, in # situations where the worker is live but is pushing lots of data to # us in a build. yield self.master.data.updates.setBuildStateString(self.buildid, u'pinging worker') log.msg("starting build %s.. pinging the worker %s" % (self, workerforbuilder)) try: ping_success_or_failure = yield workerforbuilder.ping() except Exception: ping_success_or_failure = Failure() if ping_success_or_failure is not True: yield self.buildPreparationFailure(ping_success_or_failure, "worker_ping") self.buildFinished(["worker", "not", "pinged"], RETRY) return self.conn = workerforbuilder.worker.conn # To retrieve the builddir property, the worker must be attached as we # depend on its path_module. Latent workers become attached only after # preparing them, so we can't setup the builddir property earlier like # the rest of properties self.setupWorkerBuildirProperty(workerforbuilder) self.setupWorkerForBuilder(workerforbuilder) self.subs = self.conn.notifyOnDisconnect(self.lostRemote) # tell the remote that it's starting a build, too try: yield self.conn.remoteStartBuild(self.builder.name) except Exception: yield self.buildPreparationFailure(Failure(), "start_build") self.buildFinished(["worker", "not", "building"], RETRY) return yield self.master.data.updates.setBuildStateString(self.buildid, u'acquiring locks') yield self.acquireLocks() yield self.master.data.updates.setBuildStateString(self.buildid, u'building') # start the sequence of steps self.startNextStep()
class TubConnector: """I am used to make an outbound connection. I am given a target TubID and a list of locationHints, and I try all of them until I establish a Broker connected to the target. I will consider redirections returned along the way. The first hint that yields a connected Broker will stop the search. If targetTubID is None, we are going to make an unencrypted connection. This is a single-use object. The connection attempt begins as soon as my connect() method is called. @param locationHints: the list of 'host:port' hints where the remote tub can be found. """ failureReason = None CONNECTION_TIMEOUT = 60 timer = None def __init__(self, parent, tubref): self.tub = parent self.target = tubref self.remainingLocations = self.target.getLocations() # attemptedLocations keeps track of where we've already try to # connect, so we don't try them twice. self.attemptedLocations = [] # pendingConnections contains a (PBClientFactory -> Connector) map # for pairs where connectTCP has started, but negotiation has not yet # completed. We keep track of these so we can shut them down when we # stop connecting (either because one of the connections succeeded, # or because someone told us to give up). self.pendingConnections = {} def connect(self): """Begin the connection process. This should only be called once. This will either result in the successful Negotiation object invoking the parent Tub's brokerAttached() method, our us calling the Tub's connectionFailed() method.""" timeout = self.tub.options.get('connect_timeout', self.CONNECTION_TIMEOUT) self.timer = reactor.callLater(timeout, self.connectionTimedOut) self.active = True self.connectToAll() def stopConnectionTimer(self): if self.timer: self.timer.cancel() del self.timer def shutdown(self): self.active = False self.stopConnectionTimer() for c in self.pendingConnections.values(): c.disconnect() def connectToAll(self): while self.remainingLocations: location = self.remainingLocations.pop() if location in self.attemptedLocations: continue self.attemptedLocations.append(location) host, port = location.split(":") port = int(port) f = TubConnectorClientFactory(self, host) c = reactor.connectTCP(host, port, f) self.pendingConnections[f] = c if self.tub.options.get("debug_stall_second_connection"): # for unit tests, hold off on making the second connection # for a moment. This allows the first connection to get to a # known state. reactor.callLater(0.1, self.connectToAll) return self.checkForFailure() def connectionTimedOut(self): self.timer = None why = "no connection established within client timeout" self.failureReason = Failure(NegotiationError(why)) self.shutdown() self.failed() def clientConnectionFailed(self, factory, reason): # this is called if a TCP connection cannot be established if not self.failureReason: self.failureReason = reason del self.pendingConnections[factory] self.checkForFailure() def redirectReceived(self, newLocation): # the redirected connection will disconnect soon, which will trigger # negotiationFailed(), so we don't have to do a # del self.pendingConnections[factory] self.remainingLocations.append(newLocation) self.connectToAll() def negotiationFailed(self, factory, reason): # this is called if protocol negotiation cannot be established, or if # the connection is closed for any reason prior to switching to the # Banana protocol assert isinstance(reason, Failure), \ "Hey, %s isn't a Failure" % (reason,) if (not self.failureReason or isinstance(reason, NegotiationError)): # don't let mundane things like ConnectionFailed override the # actually significant ones like NegotiationError self.failureReason = reason del self.pendingConnections[factory] self.checkForFailure() def negotiationComplete(self, factory): # 'factory' has just completed negotiation, so abandon all the other # connection attempts self.active = False if self.timer: self.timer.cancel() self.timer = None del self.pendingConnections[factory] # this one succeeded for f in self.pendingConnections.keys(): # abandon the others # this will trigger clientConnectionFailed and/or # negotiationFailed calls f.disconnect() def checkForFailure(self): if not self.active: return if self.remainingLocations: return if self.pendingConnections: return # we have no more options, so the connection attempt will fail. The # getBrokerForTubRef may have succeeded, however, if the other side # tried to connect to us at exactly the same time, they were the # master, they established their connection first (but the final # decision is still in flight), and they hung up on our connection # because they felt it was a duplicate. So, if self.failureReason # indicates a duplicate connection, do not signal a failure here. We # leave the connection timer in place in case they lied about having # a duplicate connection ready to go. if (self.failureReason.check(RemoteNegotiationError) and isSubstring( self.failureReason.value.args[0], "Duplicate connection")): log.msg("TubConnector.checkForFailure: connection attempt " "failed because the other end decided ours was a " "duplicate connection, so we won't signal the " "failure here") return self.failed() def failed(self): self.stopConnectionTimer() self.active = False self.tub.connectionFailed(self.target, self.failureReason)