def get_service(self, name, request): # cache isn't full for the current application if len(self.cache[name]) < self.spool_size: logger = request.logger try: app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT) logger.info("%s: creating an instance of %s", app.id, name) self.cache[name].append(app) yield app.connect(request.traceid) logger.info("%s: connect to an app %s endpoint %s ", app.id, app.name, "{0}:{1}".format(*app.address)) timeout = (1 + random.random()) * self.refresh_period self.io_loop.call_later(timeout, self.move_to_inactive(app, name)) except Exception as err: logger.error("%s: unable to connect to `%s`: %s", app.id, name, err) drop_app_from_cache(self.cache, app, name) raise gen.Return() else: raise gen.Return(app) # get an instance from cache chosen = random.choice(self.cache[name]) raise gen.Return(chosen)
def get_service(self, name, request): # cache isn't full for the current application if len(self.cache[name]) < self.spoolSize: logger = request.logger try: app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT) logger.info("%s: creating an instance of %s", app.id, name) self.cache[name].append(app) yield app.connect(request.traceid) logger.info("%s: connect to an app %s endpoint %s ", app.id, app.name, "{0}:{1}".format(*app.address)) timeout = (1 + random.random()) * self.refreshPeriod self.io_loop.call_later(timeout, self.move_to_inactive(app, name)) except Exception as err: logger.error("%s: unable to connect to `%s`: %s", app.id, name, err) if app in self.cache[name]: self.cache[name].remove(app) raise gen.Return() else: raise gen.Return(app) # get an instance from cache chosen = random.choice(self.cache[name]) raise gen.Return(chosen)
def stop_app(self, appname): succeed = list() failed = list() hosts_count = len(self.hosts) for i, host in enumerate(self.hosts): log.info("Stop %s at host %d/%d %s" % (appname, i, hosts_count, host)) nodeinstance = None try: nodeinstance = Service("node", blockingConnect=False) yield nodeinstance.connect(host=host) res = yield app.Stop(nodeinstance, appname).execute() self.logcallback(str(res) + '\n') except Exception as e: item = "Unable to connect to node at host %s %s\n" % (host, e) log.error(item) self.logcallback(item) failed.append(host) else: item = "App %s has been stoped successfully\n" % appname log.info(item) self.logcallback(item) succeed.append(host) finally: if nodeinstance is not None: nodeinstance.disconnect() yield (succeed, failed)
def wrapper(): active_apps = len(self.cache[name]) self.logger.info( "%s: preparing to moving %s %s to an inactive queue (active %d)", app.id, app.name, "{0}:{1}".format(*app.address), active_apps) try: new_app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT) self.logger.info("%s: creating an instance of %s", new_app.id, name) yield new_app.connect() self.logger.info("%s: connect to an app %s endpoint %s ", new_app.id, new_app.name, "{0}:{1}".format(*new_app.address)) timeout = (1 + random.random()) * self.refresh_period self.io_loop.call_later(timeout, self.move_to_inactive(new_app, name)) # add to cache only after successfully connected self.cache[name].append(new_app) except Exception as err: self.logger.error("%s: unable to connect to `%s`: %s", new_app.id, name, err) # schedule later self.io_loop.call_later(self.get_timeout(name), self.move_to_inactive(app, name)) else: self.logger.info("%s: move %s %s to an inactive queue", app.id, app.name, "{0}:{1}".format(*app.address)) # current active app will be dropped here self.migrate_from_cache_to_inactive(app, name)
def stop_app(self, appname): succeed = list() failed = list() hosts_count = len(self.hosts) for i, host in enumerate(self.hosts): log.info("Stop %s at host %d/%d %s" % (appname, i, hosts_count, host)) nodeinstance = None try: nodeinstance = Service("node", blockingConnect=False) yield nodeinstance.connect(host=host) res = yield app.Stop(nodeinstance, appname).execute() self.logcallback(str(res) + "\n") except Exception as e: item = "Unable to connect to node at host %s %s\n" % (host, e) log.error(item) self.logcallback(item) failed.append(host) else: item = "App %s has been stoped successfully\n" % appname log.info(item) self.logcallback(item) succeed.append(host) finally: if nodeinstance is not None: nodeinstance.disconnect() yield (succeed, failed)
def reelect_app(self, request, app): """tries to connect to the same app on differnet host from dist-info""" # disconnect app explicitly to break possibly existing connection app.disconnect() endpoints_size = len(app.locator.endpoints) # try x times, where x is the number of different endpoints in app locator. for _ in xrange(0, endpoints_size + 1): # last chance to take app from common pool if len(app.locator.endpoints) == 0: request.logger.info( "giving up on connecting to dist-info hosts, falling back to common pool processing" ) app = yield self.proxy.reelect_app(request, app) raise gen.Return(app) try: # always create new locator to prevent locking as we do connect with timeout # however lock can be still held during TCP timeout locator = Locator(endpoints=app.locator.endpoints) request.logger.info("connecting to locator %s", locator.endpoints[0]) # first try to connect to locator only on remote host with timeout yield gen.with_timeout(self.service_connect_timeout, locator.connect()) request.logger.debug("connected to locator %s for %s", locator.endpoints[0], app.name) app = Service(app.name, locator=locator, timeout=RESOLVE_TIMEOUT) # try to resolve and connect to application itself yield gen.with_timeout(self.service_connect_timeout, app.connect()) request.logger.debug("connected to application %s via %s", app.name, app.endpoints) except gen.TimeoutError: # on timeout try next endpoint first request.logger.warning( "timed out while connecting to application") continue except ServiceError as err: request.logger.warning("got error while resolving app - %s", err) if err.category in LOCATORCATEGORY and err.code == ESERVICENOTAVAILABLE: # if the application is down - also try next endpoint continue else: raise err finally: # drop first endpoint to start next connection from different endpoint # we do this, as default logic of connection attempts in locator do not fit here app.locator.endpoints = app.locator.endpoints[1:] # return connected app raise gen.Return(app) raise PluginApplicationError(42, 42, "could not connect to application")
def main(): locator = Service("locator") try: yield locator.connect() except: print "2; error while connect to locator" exit(1) print "0;Ok"
def get_service_with_seed(self, name, seed, request): logger = request.logger app = Service(name, seed=seed, locator=self.locator) try: logger.info("%s: creating an instance of %s, seed %s", app.id, name, seed) yield app.connect(logger.traceid) except Exception as err: logger.error("%s: unable to connect to `%s`: %s", app.id, name, err) raise gen.Return() raise gen.Return(app)
def get_service_with_seed(self, name, seed, request): logger = request.logger app = Service(name, seed=seed, locator=self.locator) try: logger.info("%s: creating an instance of %s, seed %s", app.id, name, seed) yield app.connect(request.traceid) except Exception as err: logger.error("%s: unable to connect to `%s`: %s", app.id, name, err) raise gen.Return() raise gen.Return(app)
def main(): locator = Service("locator") try: yield locator.connect() except: print "1; error while connect to locator" exit(1) try: chan = yield locator.resolve("graphite") result = yield chan.rx.get() except: print "2; error while resolv service graphite" exit(1) print "0;Ok"
def reelect_app(self, request, app): """tries to connect to the same app on differnet host from dist-info""" # store current endpoints of locator locator_endpoints = app.locator.endpoints # disconnect app explicitly to break possibly existing connection app.disconnect() endpoints_size = len(locator_endpoints) # try x times, where x is the number of different endpoints in app locator. for _ in xrange(0, endpoints_size): try: # move first endpoint to the end to start new connection from different endpoint # we do this, as default logic of connection attempts in locator do not fit here locator_endpoints = locator_endpoints[1:] + locator_endpoints[:1] # always create new locator to prevent locking as we do connect with timeout # however lock can be still held during TCP timeout locator = Locator(endpoints=locator_endpoints) request.logger.info("connecting to locator %s", locator.endpoints[0]) # first try to connect to locator only on remote host with timeout yield gen.with_timeout(self.service_connect_timeout, locator.connect()) request.logger.debug("connected to locator %s for %s", locator.endpoints[0], app.name) app = Service(app.name, locator=locator, timeout=RESOLVE_TIMEOUT) # try to resolve and connect to application itself yield gen.with_timeout(self.service_connect_timeout, app.connect()) request.logger.debug("connected to application %s via %s", app.name, app.endpoints) except gen.TimeoutError: # on timeout try next endpoint first request.logger.warning("timed out while connecting to application") continue except ServiceError as err: request.logger.warning("got error while resolving app - %s", err) if err.category in LOCATORCATEGORY and err.code == ESERVICENOTAVAILABLE: # if the application is down - also try next endpoint continue else: raise err # return connected app raise gen.Return(app) raise PluginApplicationError(42, 42, "could not connect to application")
def app_info(task, response): info = dict() try: name = task["appname"] version = task["version"] username = task["username"] # todo: without version - search by mask? regex? appname = appname_from_name_version(name, version) if username: # not admin - all apps user_apps = yield app.List(storage).execute() else: user_apps = yield db.user_apps(username) if appname not in user_apps: raise ValueError("App %s doesn't exist" % appname) hosts = yield hostdb.hosts() for host in hosts: appinstance = None try: appinstance = Service(appname, blockingConnect=False) yield appinstance.connect(host=host) info[host] = yield appinstance.info() except Exception as err: log.error("Unable to connect to app %s host %s" % (appname, host)) finally: if appinstance is not None: appinstance.disconnect() except KeyError as err: response.error(-500, "Missing argument %s" % str(err)) except Exception as err: log.error("Unknown error %s" % repr(err)) response.error(-100, "Unknown error %s" % repr(err)) else: response.write(info) finally: response.close()
def wrapper(): active_apps = len(self.cache[name]) self.logger.info("%s: preparing to moving %s %s to an inactive queue (active %d)", app.id, app.name, "{0}:{1}".format(*app.address), active_apps) try: new_app = Service(name, locator=self.locator, timeout=RESOLVE_TIMEOUT) self.logger.info("%s: creating an instance of %s", new_app.id, name) yield new_app.connect() self.logger.info("%s: connect to an app %s endpoint %s ", new_app.id, new_app.name, "{0}:{1}".format(*new_app.address)) timeout = (1 + random.random()) * self.refresh_period self.io_loop.call_later(timeout, self.move_to_inactive(new_app, name)) # add to cache only after successfully connected self.cache[name].append(new_app) except Exception as err: self.logger.error("%s: unable to connect to `%s`: %s", new_app.id, name, err) # schedule later self.io_loop.call_later(self.get_timeout(name), self.move_to_inactive(app, name)) else: self.logger.info("%s: move %s %s to an inactive queue", app.id, app.name, "{0}:{1}".format(*app.address)) # current active app will be dropped here self.migrate_from_cache_to_inactive(app, name)
def test_service_double_connect(): io = IOLoop.current() storage = Service("storage", endpoints=[["localhost", 10053]], io_loop=io) io.run_sync(lambda: storage.connect("TRACEID")) io.run_sync(storage.connect)
class ReconnectableService(object): DEFAULT_HOST = 'localhost' DEFAULT_PORT = 10053 DEFAULT_ADDRESS = '{host}:{port}'.format(host=DEFAULT_HOST, port=DEFAULT_PORT) def __init__(self, app_name, addresses=None, attempts=3, delay=0.1, max_delay=60.0, delay_exp=2.0, connect_timeout=None, timeout=None, logger=None): self.delay = delay self.max_delay = max_delay self.delay_exp = delay_exp self.connect_timeout = connect_timeout self.timeout = timeout self.attempts = attempts self.logger = logger or Logger() self._reset() addresses = addresses or ReconnectableService.DEFAULT_ADDRESS pairs = [] for address in addresses.split(','): address_parts = address.split(':') host = address_parts[0] port = (len(address_parts) > 1 and int(address_parts[1]) or ReconnectableService.DEFAULT_PORT) pairs.append((host, port)) self.addresses = itertools.cycle(pairs) self.app_name = app_name self.upstream = None def _reset(self): self._cur_delay = self.delay @chain.source def enqueue(self, handler, data, attempts=None, timeout=None): attempt = 1 request_attempts = attempts or self.attempts while True: try: yield self._reconnect_if_needed() yield self.upstream.enqueue(handler, data, timeout=timeout or self.timeout) self._reset() break except Exception as e: error_str = 'Upstream service request failed (attempt {}/{}): {}'.format( attempt, request_attempts, e) if isinstance(e, CommunicationError): self.logger.error(error_str) if isinstance(e, DisconnectionError): self.logger.debug( 'Disconnection from upstream service, ' 'will reconnect on next attempt') self.upstream = None else: self.logger.error(error_str) if attempt >= request_attempts: self._reset() raise attempt += 1 yield self._delay() @chain.source def _delay(self): d = Deferred() ioloop.IOLoop.current().add_timeout(timedelta(seconds=self._cur_delay), lambda: d.trigger(None)) self.logger.debug('Delaying for {:.2f} s'.format(self._cur_delay)) yield d self.logger.debug('Resuming from delay...') self._cur_delay = min(self._cur_delay * self.delay_exp, self.max_delay) @chain.source def _reconnect_if_needed(self): if not self.upstream: host, port = self.addresses.next() self.upstream = Service(self.app_name, blockingConnect=False) self.logger.debug('Connecting to upstream service "{}", host={}, ' 'port={}'.format(self.app_name, host, port)) yield self.upstream.connect(host=host, port=port, timeout=self.connect_timeout, blocking=False) if not self.upstream.isConnected(): try: self.logger.debug( 'Reconnecting to upstream service "{}"'.format( self.app_name)) yield self.upstream.reconnect(timeout=self.connect_timeout, blocking=False) except IllegalStateError: # seems to be in connecting state pass
class ReconnectableService(object): DEFAULT_HOST = 'localhost' DEFAULT_PORT = 10053 DEFAULT_ADDRESS = '{host}:{port}'.format(host=DEFAULT_HOST, port=DEFAULT_PORT) def __init__(self, app_name, addresses=None, attempts=3, delay=0.1, max_delay=60.0, delay_exp=2.0, connect_timeout=None, timeout=None, logger=None): self.delay = delay self.max_delay = max_delay self.delay_exp = delay_exp self.connect_timeout = connect_timeout self.timeout = timeout self.attempts = attempts self.logger = logger or Logger() self._reset() addresses = addresses or ReconnectableService.DEFAULT_ADDRESS pairs = [] for address in addresses.split(','): address_parts = address.split(':') host = address_parts[0] port = (len(address_parts) > 1 and int(address_parts[1]) or ReconnectableService.DEFAULT_PORT) pairs.append((host, port)) self.addresses = itertools.cycle(pairs) self.app_name = app_name self.upstream = None def _reset(self): self._cur_delay = self.delay @chain.source def enqueue(self, handler, data, attempts=None, timeout=None): attempt = 1 request_attempts = attempts or self.attempts while True: try: yield self._reconnect_if_needed() yield self.upstream.enqueue(handler, data, timeout=timeout or self.timeout) self._reset() break except Exception as e: error_str = 'Upstream service request failed (attempt {}/{}): {}'.format( attempt, request_attempts, e) if isinstance(e, CommunicationError): self.logger.error(error_str) if isinstance(e, DisconnectionError): self.logger.debug('Disconnection from upstream service, ' 'will reconnect on next attempt') self.upstream = None else: self.logger.error(error_str) if attempt >= request_attempts: self._reset() raise attempt += 1 yield self._delay() @chain.source def _delay(self): d = Deferred() ioloop.IOLoop.current().add_timeout(timedelta(seconds=self._cur_delay), lambda: d.trigger(None)) self.logger.debug('Delaying for {:.2f} s'.format(self._cur_delay)) yield d self.logger.debug('Resuming from delay...') self._cur_delay = min(self._cur_delay * self.delay_exp, self.max_delay) @chain.source def _reconnect_if_needed(self): if not self.upstream: host, port = self.addresses.next() self.upstream = Service(self.app_name, blockingConnect=False) self.logger.debug('Connecting to upstream service "{}", host={}, ' 'port={}'.format(self.app_name, host, port)) yield self.upstream.connect(host=host, port=port, timeout=self.connect_timeout, blocking=False) if not self.upstream.isConnected(): try: self.logger.debug( 'Reconnecting to upstream service "{}"'.format( self.app_name)) yield self.upstream.reconnect(timeout=self.connect_timeout, blocking=False) except IllegalStateError: # seems to be in connecting state pass
def test_service_double_connect(): io = CocaineIO.instance() node = Service("node", host="localhost", port=10053, loop=io) node.connect().wait(4) node.connect().wait(4)
def test_service_invalid_api_version(): io = CocaineIO.instance() node = Service("node", host="localhost", port=10053, version=100, loop=io) node.connect().wait(4)