class SendWebSocket(tornado.websocket.WebSocketHandler): #on_message -> receive data #write_message -> send data def __init__(self, *args, **keys): self.i = 0 super(SendWebSocket, self).__init__(*args, **keys) def open(self): self.callback = PeriodicCallback(self._send_message, 1) self.callback.start() print "WebSocket opend" def on_message(self, message): print message def _send_message(self): self.i += 1 self.write_message(str(self.i)) if self.i % 20 == 0: self.write_message("\n") def on_close(self): self.callback.stop() print "WebSocket closed"
class SocketHandler(WebSocketHandler): def check_origin(self, origin): """ Overrides the parent method to return True for any request, since we are working without names :returns: bool True """ return True def open(self): logging.info("Connection open from " + self.request.remote_ip) if not self in statusmonitor_open_sockets: statusmonitor_open_sockets.append(self) #http://stackoverflow.com/a/19571205 self.callback = PeriodicCallback(self.send_data, 1000) self.callback.start() start_callback() def send_data(self): self.write_message(data_json) return def on_close(self): self.callback.stop() if self in statusmonitor_open_sockets: statusmonitor_open_sockets.remove(self) stop_callback() def send_update(self): pass
class WebSocket(tornado.websocket.WebSocketHandler): def check_origin(self, origin): return True def on_message(self, message): """Evaluates the function pointed to by json-rpc.""" # Start an infinite loop when this is called if message == "read_camera": self.camera_loop = PeriodicCallback(self.loop, 10) self.camera_loop.start() # Extensibility for other methods else: print("Unsupported function: " + message) def loop(self): """Sends camera images in an infinite loop.""" bio = io.BytesIO() if args.use_usb: _, frame = camera.read() img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) img.save(bio, "JPEG") else: camera.capture(bio, "jpeg", use_video_port=True) try: self.write_message(base64.b64encode(bio.getvalue())) except tornado.websocket.WebSocketClosedError: self.camera_loop.stop()
def broadcast_sys_info(): global upCount, downCount, leftCount, rightCount global pcb, ser if pcb is None: pcb = PeriodicCallback(broadcast_sys_info, 100) pcb.start() valueRead = serialArduino.readline() choiceSearch = re.search("UP|DOWN|LEFT|RIGHT", str(valueRead)) try: left_sent = 0 right_sent = 0 up_sent = 0 down_sent = 0 choice = choiceSearch.group(0) print(choice) if choice == "UP": up_sent += 1 upCount += 1 elif choice == "DOWN": down_sent += 1 downCount += 1 elif choice == "LEFT": left_sent += 1 leftCount += 1 elif choice == "RIGHT": right_sent += 1 rightCount += 1 publish_data("sysinfo", {"left_t": left_sent, "right_t": right_sent, "top": up_sent, "down": down_sent}) except AttributeError: pass
def start_wanikani_updater(): # Send first request request_update_wanikani() # Schedule a new update request every 30 seconds periodic_callback = PeriodicCallback(wrap_traceback(request_update_wanikani), 30 * 1000) # milliseconds periodic_callback.start()
class SendWebSocketHandler(tornado.websocket.WebSocketHandler): # on_message recieve data # write_message send data def open(self): self.callback = PeriodicCallback(self._send_message, 10000) self.callback.start() print("[START] WebSocket") def on_message(self, message): print("[START] WebSocket on_message") print(message) def _send_message(self): cur = DB.execute("SELECT * FROM lm35dz ORDER BY YMDHHMM DESC") rec = cur.fetchone() send_value = "" if rec == None: send_value = "Data Nothing" else: send_value = "%s %s" % (rec[0], rec[1]) self.write_message(send_value) def on_close(self): self.callback.stop() print("[ENDED] WebSocket")
class AsyncPopenFixed(seesaw.externalprocess.AsyncPopen): """ Start the wait_callback after setting self.pipe, to prevent an infinite spew of "AttributeError: 'AsyncPopen' object has no attribute 'pipe'" """ def run(self): self.ioloop = IOLoop.instance() (master_fd, slave_fd) = pty.openpty() # make stdout, stderr non-blocking fcntl.fcntl(master_fd, fcntl.F_SETFL, fcntl.fcntl(master_fd, fcntl.F_GETFL) | os.O_NONBLOCK) self.master_fd = master_fd self.master = os.fdopen(master_fd) # listen to stdout, stderr self.ioloop.add_handler(master_fd, self._handle_subprocess_stdout, self.ioloop.READ) slave = os.fdopen(slave_fd) self.kwargs["stdout"] = slave self.kwargs["stderr"] = slave self.kwargs["close_fds"] = True self.pipe = subprocess.Popen(*self.args, **self.kwargs) self.stdin = self.pipe.stdin # check for process exit self.wait_callback = PeriodicCallback(self._wait_for_end, 250) self.wait_callback.start()
def initialize(self, io_loop): ''' Start a Bokeh Server Tornado Application on a given Tornado IOLoop. ''' self._loop = io_loop for app_context in self._applications.values(): app_context._loop = self._loop self._clients = set() self._stats_job = PeriodicCallback(self._log_stats, self._stats_log_frequency_milliseconds) if self._mem_log_frequency_milliseconds > 0: self._mem_job = PeriodicCallback(self._log_mem, self._mem_log_frequency_milliseconds) else: self._mem_job = None self._cleanup_job = PeriodicCallback(self._cleanup_sessions, self._check_unused_sessions_milliseconds) if self._keep_alive_milliseconds > 0: self._ping_job = PeriodicCallback(self._keep_alive, self._keep_alive_milliseconds) else: self._ping_job = None
class Service(Scaffold): def __init__(self, interval=1): ''' inteval is in seconds ''' super(Service, self).__init__() self.interval = interval * 1000 self.periodicalCb = None def stop(self): if self.periodicalCb: self.periodicalCb.stop() def run(self, *args, **kwargs): super(Service, self).run(*args, **kwargs) self.periodicalCb = PeriodicCallback( partial(super(Service, self).run, *args, **kwargs), self.interval, IOLoop.instance()) self.periodicalCb.start() IOLoop.instance().start() def main(self): ''' Subclass this method ''' logging.error('Subclass main method... %s' % time.clock())
def start(self): periodic_task = PeriodicCallback( self.update, 20, io_loop=self.main_loop ) periodic_task.start()
def initialize(self, io_loop=None, keep_alive_milliseconds=37000, # how often to check for unused sessions check_unused_sessions_milliseconds=17000, # how long unused sessions last unused_session_lifetime_milliseconds=15000, # how often to log stats stats_log_frequency_milliseconds=15000, **kw): if io_loop is None: io_loop = IOLoop.current() self._loop = io_loop for app_context in self._applications.values(): app_context._loop = self._loop self._clients = set() self._executor = ProcessPoolExecutor(max_workers=4) self._loop.add_callback(self._start_async) self._stats_job = PeriodicCallback(self.log_stats, stats_log_frequency_milliseconds, io_loop=self._loop) self._unused_session_linger_milliseconds = unused_session_lifetime_milliseconds self._cleanup_job = PeriodicCallback(self.cleanup_sessions, check_unused_sessions_milliseconds, io_loop=self._loop) if keep_alive_milliseconds > 0: self._ping_job = PeriodicCallback(self.keep_alive, keep_alive_milliseconds, io_loop=self._loop) else: self._ping_job = None
def _run(self): assert self.message_handler, "you must specify the Reader's message_handler" logger.info('[%s] starting reader for %s/%s...', self.name, self.topic, self.channel) for addr in self.nsqd_tcp_addresses: address, port = addr.split(':') self.connect_to_nsqd(address, int(port)) self.redist_periodic = PeriodicCallback( self._redistribute_rdy_state, 5 * 1000, io_loop=self.io_loop, ) self.redist_periodic.start() if not self.lookupd_http_addresses: return # trigger the first lookup query manually self.query_lookupd() self.query_periodic = PeriodicCallback( self.query_lookupd, self.lookupd_poll_interval * 1000, io_loop=self.io_loop, ) # randomize the time we start this poll loop so that all # consumers don't query at exactly the same time delay = random.random() * self.lookupd_poll_interval * self.lookupd_poll_jitter self.io_loop.add_timeout(time.time() + delay, self.query_periodic.start)
def start(self): periodic_task = PeriodicCallback( self.update, self.application.config.UPDATE_PERIOD * 1000, io_loop=self.main_loop ) periodic_task.start()
class TempSocketHandler(websocket.WebSocketHandler): waiters = set() cache = [] cache_size = 200 actual_flag = '' def allow_draft76(self): return True def open(self): TempSocketHandler.waiters.add(self) self.callback = PeriodicCallback(self.send_flag, 1000) self.callback.start() def on_close(self): TempSocketHandler.waiters.remove(self) @classmethod def update_cache(cls, chat): cls.cache.append(chat) if len(cls.cache) > cls.cache_size: cls.cache = cls.cache[-cls.cache_size:] def send_flag(self): if len(self.waiters) > 0: logging.info("sending message to %d waiters", len(self.waiters)) data = '{"time":"'+str(int(time.time()))+'","data":"'+str(random.randrange(0, 100))+'"}' self.write_message(data) else: logging.info("No one is waiting...")
def run(): parser = ArgumentParser() parser.add_argument("-f", "--fake", action="store_true", help="Use a fake connection for development") parser.add_argument("-i", "--id", default=socket.gethostname(), help="ID of this site") args = parser.parse_args() if args.fake: m = MissileLauncher(FakeMissileLauncherConnection()) else: m = MissileLauncher(MissileLauncherConnection(0)) config = { 'launcher': m, 'id': args.id } application = Application([ (r"/position", PositionHandler, config), (r"/move/(-?[01])/(-?[01])", PositionHandler, config), (r"/move_to/([-0-9.]*)/([-0-9.]*)", MoveHandler, config), (r"/fire_at/([-0-9.]*)/([-0-9.]*)", FireHandler, config), (r"/calibrate", CalibrateHandler, config), (r"/", IndexHandler), (r"/static/(.*)", StaticFileHandler, {'path': 'static/'}) ], debug=True) application.listen(7777) periodic = PeriodicCallback(m.timestep, 100) periodic.start() print('Site {} listening at http://{}:7777'.format(args.id, socket.gethostname())) IOLoop.instance().start()
def broadcast_games(): global pcb data_dict = {} games_dict = {} if pcb is None: pcb = PeriodicCallback(broadcast_games, 6000) pcb.start() g_list = ww_redis_db.keys("g_list:*") for v in g_list: v = v.decode("utf-8") if len(g_list) > 0: # find game with least spaces for g_key in g_list: g_id = str(g_key.decode("utf-8")).split(":")[1] game = Game(g_id) games_dict["game:"+g_id] = game.as_JSON() data_dict["game"] = games_dict data_dict["channel"] = "lobbyinfo" publish_data("lobbyinfo", data_dict) return data_dict
class cpustatus(tornado.websocket.WebSocketHandler): #on_message -> receive data #write_message -> send data #index.html def open(self): #self.i = readData() self.i = 0 self.last = 0 self.cpu = PeriodicCallback(self._send_cpu, 500) # self.cpu.start() def on_message(self, message): global MainMotorMax self.i = int(message) MainMotorMax = self.i print message def _send_cpu(self): #self.write_message(str(vmstat()[15])) #self.write_message(str(time.time())) #self.i = readData() if self.i != self.last: self.write_message(str(self.i)) self.last = self.i print self.i # def on_close(self): self.cpu.stop()
def __init__(self, ioloop, modules): self.modules = modules PeriodicCallback.__init__(self, self.reload_server, 5000, ioloop) self.repo = Repo("./") self.prev_head = self.repo.active_branch.commit logging.debug("Starting with commit " + str(self.prev_head) + "\n" + self.prev_head.summary)
class LoLAPI(object): def __init__(self, client): self.timer = PeriodicCallback(self.status, 1000, IOLoop.instance()) self.client = client self.timer.start() def status(self): self.client.one.update_status(dict( last_updated = datetime.now().strftime("%H:%M:%S %d-%m-%y"), game_stats = db.games_data.count(), players = db.users.count(), full_games = db.games.count(), invalid_games = db.invalid_games.count() )) def set_user(self, name): self.user = User.by_name(name) stats = GameStats.find(dict(summoner = self.user.get_dbref())) games = [Game.find_one(stat['game_id']) for stat in stats] self.client.one.update_games([1, 2, 3, 4, 5, 6, 7]) # self.client.one.update_games(list(stats)) def detach(self): self.timer.stop()
class WSHandler(tornado.websocket.WebSocketHandler): def initialize(self): self.values = [[], []] def check_origin(self, origin): return True def open(self): # Send message periodic via socket upon a time interval self.initialize() self.callback = PeriodicCallback(self.send_values, timeInterval) self.callback.start() def send_values(self): MAX_POINTS = 30 # Generates random values to send via websocket for val in self.values: if len(val) < MAX_POINTS: val.append(randint(1, 10)) else: val.pop(0) val.append(randint(1, 10)) # self.values1 = [randint(1,10) for i in range(100)] message = {"Channel0": self.values[0], "Channel1": self.values[1]} # self.write_message(message) message = {"DataInfo": [{"id": 40, "sname": "SOG"}]} self.write_message(message) def on_message(self, message): pass def on_close(self): self.callback.stop()
class WebSocketChatHandler(tornado.websocket.WebSocketHandler): def initialize(self): self.clients = [] self.callback = PeriodicCallback(self.update_chat, 500) self.web_gui_user = self.player_manager.get_by_name(self.get_secure_cookie("player")) def open(self, *args): self.clients.append(self) for msg in self.messages_log: self.write_message(msg) self.callback.start() def on_message(self, message): messagejson = json.loads(message) self.messages.append(message) self.messages_log.append(message) self.factory.broadcast("^yellow;<{d}> <^red;{u}^yellow;> {m}".format( d=datetime.now().strftime("%H:%M"), u=self.web_gui_user.name, m=messagejson["message"]), 0, "") def update_chat(self): if len(self.messages) > 0: for message in sorted(self.messages): for client in self.clients: client.write_message(message) del self.messages[0:len(self.messages)] def on_close(self): self.clients.remove(self) self.callback.stop()
class WSHandler(tornado.websocket.WebSocketHandler): # track clients: # simplest method is just to keep a list or dict of WSHandler instances: clients = [] def open(self): self.clients.append(self) # print 'New connection was opened' # self.write_message("Welcome to my websocket!") # http://tornado.readthedocs.org/en/latest/ioloop.html # The callback is called every callback_time milliseconds. # class tornado.ioloop.PeriodicCallback(callback, callback_time, io_loop=None) self.callback = PeriodicCallback(self.send_hello, 5000) self.callback.start() def send_hello(self): self.write_message('hello') def msg(self,message): self.write_message(message) threading.Timer(10, self.msg('in timer')).start() print 'in msg'+message # def on_message(self, message): # pass def on_message(self, message): print 'Incoming message:', message self.write_message("You said: " + message) def on_close(self): self.clients.remove(self) print 'Connection was closed...'
def start_task(self, task): self.log_reader = InstallLogReader(path=self.core.settings.tmp_logs_path, task_id=str(task.id)) common_log = self.log_reader.common_working_log() self.logger.addHandler(TaskDbLogHandler(task)) f_out = open(common_log, 'w') self.process = subprocess.Popen( ( sys.executable, '-u', sys.argv[0], '--task-work={0}'.format(task.id), ), stderr=f_out, stdout=f_out ) print("start child task with pid %i" % self.process.pid) task.parent_pid = os.getpid() task.pid = self.process.pid task.save() self.configs["task"] = task self.timer_check_logdir = PeriodicCallback(lambda: TornadoWorker.periodic_check_logdir(self), 700) self.timer_read_log = PeriodicCallback(lambda: TornadoWorker.periodic_read_logs(self), 300) self.timer_read_log.start() self.timer_check_logdir.start()
class WebSocketHandler(websocket.WebSocketHandler): def initialize(self, queue): self.clients = dict() self.queue = queue self.callback = PeriodicCallback(self.message_clients, 120) self.callback.start() def open(self, *args): self.id = self.get_argument("id") self.stream.set_nodelay(True) self.clients[self.id] = {"id": self.id, "object": self} def on_message(self, message): """ when we receive some message we want some message handler.. for this example i will just print message to console """ print "Client %s received a message : %s" % (self.id, message) def on_close(self): if self.id in self.clients: del self.clients[self.id] print "Removed client " + self.id def message_clients(self): message = self.queue.get() for client in self.clients: try: self.write_message(message) except: print "Message could not be written"
def main(): parser = argparse.ArgumentParser() parser.add_argument('config', help='config file') args = parser.parse_args() logger.warn('Reading config from {}'.format(args.config)) config = {} with open(args.config, 'r') as infile: config = json.load(infile) if config == {}: sys.exit() serve_config = config.get('car_serve', {}) logger.warn(serve_config) app = CarServer(config) try: logger.info('Opening HTTP server.') http_server = HTTPServer(app) http_server.listen(serve_config.get('port', 9001), address=serve_config.get('ip_address', '127.0.0.1')) update_ms = serve_config.get('update_ms', 100) logger.debug('Registering periodic callback. Every {} ms'.format(update_ms)) i = PeriodicCallback(app.car_state.update_physical_state, update_ms) i.start() IOLoop.current().start() except (SystemExit, KeyboardInterrupt): pass logger.info('Stopping server.') http_server.stop() IOLoop.current().stop() sys.exit(0)
class ThroughputTracker(object): def __init__(self, logger, loop, num_samples=3): self.logger = logger self.loop = loop # callback_time is in milliseconds self.throughput_pc = PeriodicCallback(self.onThroughput, 30 * 1000, self.loop) self.throughput_pc.start() self.samples = deque(maxlen=num_samples) self.samples.appendleft(ThroughputSample(timestamp=datetime.utcnow(), num_emitted=0)) self.num_emitted = 0 def onThroughput(self): # Throughput measurements now = datetime.utcnow() current = ThroughputSample(timestamp=now, num_emitted=self.num_emitted) deltas = [ current.timestamp - sample.timestamp for sample in self.samples ] samples = [ '%s|%0.1f' % ( deltas[i], ((current.num_emitted-sample.num_emitted) / deltas[i].total_seconds()), ) for i, sample in enumerate(self.samples) ] self.samples.appendleft(current) self.logger.info('Throughput samples: %s', ', '.join(samples))
def broadcast_match_info(): global pcb if pcb is None: pcb = PeriodicCallback(broadcast_match_info, 500) pcb.start() matches = MatchModel.objects.all() match_status = {} for match in matches: fixture = FixtureModel.objects.get(id=match.match_id) r = ResultModel.objects.get(id=match.match_id) if(r.result != 'None'): match_status['color'+str(match.match_id)] = 'info' continue dd = fixture.match_day - timezone.now() dd_str = None if(timezone.now() > fixture.match_day): dd_str = "Locked" match_status['color'+str(match.match_id)] = 'danger' else: dd_str = "%sd:%sh:%sm:%ss" %(str(dd.days),str((dd.seconds//3600)%24),str((dd.seconds%3600)//60), str((dd.seconds%3600)%60),) match_status['color'+str(match.match_id)] = 'success' match_status['time_left'+str(match.match_id)] = dd_str match_status['storedbet'+str(match.id)] = "%s %s" %(match.betting_side, match.betting_points,) match_status['odds'+str(match.match_id)] = "%s:%s" %(fixture.home_odds, fixture.away_odds,) publish_data('matchinfo', { 'match_status': match_status, })
class WebSocketGame(WebSocketHandler): def open(self): self.game_data = {} self.initialize_game() self.write_message(self.game_data) def on_message(self, message): message = json.loads(message) if message["type"] == "login": self.game_name = message["name"] self.game_id = message["game_id"] self.loop_callback = PeriodicCallback(self.do_loop, 5000) else: self.handle_message(message) def on_close(self): self.loop_callback.stop() pass def update_status(self, status): if status not in ("S", "I", "U", "F"): # Start, InProgress, Succesful, Fail return # Let's try not to hit the status API with bad values. url = "http://localhost:8080/private_api/gametask/{}/{}/{}".format(self.game_name, self.game_id, status) request = HTTPRequest(url=url) http = AsyncHTTPClient() http.fetch(request, self.callback) def callback(self, response): # Catch any errors. print "Callback fired." print "HTTP Code: {}".format(response.code)
class WSHandler(tornado.websocket.WebSocketHandler): def check_origin(self, origin): return True def open(self): with q_live.mutex: q_live.queue.clear() self.callback = PeriodicCallback(self.send_werte, 1) self.callback.start() print ('Connection open') def send_werte(self): if not q_live.empty(): signals, values = q_live.get() senden = dict(zip(signals,values)) print(senden) json_send = json.dumps(senden) self.write_message(json_send) print(q_live.qsize()) if q_live.qsize() >15: with q_live.mutex: q_live.queue.clear() def on_message(self, empf): print('Daten recievied: ') def on_close(self): print('Connection closed!') self.callback.stop()
class WebSocket(tornado.websocket.WebSocketHandler): waiters = set() # multi clients connect OK wdata = "" def open(self): print("open websocket connection") WebSocket.waiters.add(self) # client add self.callback = PeriodicCallback(self._send_message, 30000) # time out taisaku self.callback.start() def on_close(self): WebSocket.waiters.remove(self) # client remove self.callback.stop() print("close websocket connection") def on_message(self, message): WebSocket.wdata = message WebSocket.send_updates(message) @classmethod def send_updates(cls, message): # this method is singleton print(message + ":connection=" + str(len(cls.waiters))) for waiter in cls.waiters: try: waiter.write_message(message) except: print("Error sending message", exc_info=True) # TIME OUT BOUSHI CALL BACK 30Sec def _send_message(self): self.write_message("C:POLLING")
class ServerApplication(Application): """Websocket client application.""" def __init__(self): handlers = [(r"/api/v1/monitoring/health/?", HealthHandler), (r"/api/v1/sync/?", SyncHandler), (r"/api/v1/re-evaluate/?", ReEvaluateHandler), (r"/api/v1/cves/?", DeleteHandler)] Application.__init__(self, handlers) self.instance = IOLoop.instance() self.vmaas_websocket_url = "ws://%s/" % os.getenv( "VMAAS_WEBSOCKET_HOST", "vmaas_websocket:8082") self.vmaas_websocket = None self.reconnect_callback = None self.evaluator_queue = None def start(self): """Start websocket server.""" # Sync CVEs always when app starts self.evaluator_queue = mqueue.MQWriter(mqueue.EVALUATOR_TOPIC) sync_cve_md() self._websocket_reconnect() self.reconnect_callback = PeriodicCallback( self._websocket_reconnect, WEBSOCKET_RECONNECT_INTERVAL * 1000) self.reconnect_callback.start() self.instance.start() async def stop(self): """Stop platform mock server.""" await self.evaluator_queue.stop() if self.vmaas_websocket is not None: self.vmaas_websocket.close() self.vmaas_websocket = None LOGGER.info("Websocket connection closed.") self.instance.stop() def _websocket_reconnect(self): """Connect to given websocket, set message handler and callback.""" if self.vmaas_websocket is None: CNX_RECONNECT.inc() websocket_connect(self.vmaas_websocket_url, on_message_callback=self._read_websocket_message, callback=self._websocket_connect_status) def _websocket_connect_status(self, future): """Check if connection attempt succeeded.""" try: result = future.result() except: # noqa: E722 pylint: disable=bare-except result = None if result is None: # TODO: print the traceback as debug message when we use logging module instead of prints here CNX_FAIL.inc() LOGGER.warning("Unable to connect to: %s", self.vmaas_websocket_url) else: LOGGER.info("Connected to: %s", self.vmaas_websocket_url) result.write_message("subscribe-listener") self.vmaas_websocket = result @staticmethod def select_repo_based_inventory_ids(cur, repos: list): """Select inventory-ids connected with inserted repos, don't fetch it.""" if repos: cur.execute( """select inventory_id from system_platform where id in (select distinct system_id from system_repo where repo_id in (select id from repo where name in %s))""", (tuple(repos), )) else: cur.execute("""select * from system_repo where (1=0)""" ) # ensure empty result @staticmethod def select_all_inventory_ids(cur): """Select all inventory-ids, don't fetch it.""" cur.execute("select inventory_id from system_platform") @staticmethod def get_last_repobased_eval_tms(cur): """Select last repo-based evaluation timestamp.""" cur.execute("select value from timestamp_kv where name = %s", (constants.TIMESTAMP_LAST_REPO_BASED_EVAL, )) ret = cur.fetchone() if ret: return ret[0] return None @staticmethod def set_last_repobased_eval_tms(cur, timestamp: dt.datetime): """Update last repo-based evaluation timestamp.""" cur.execute( """insert into timestamp_kv (name, value) values (%s, %s) on conflict (name) do update set value = %s returning value, (xmax = 0) as inserted""", (constants.TIMESTAMP_LAST_REPO_BASED_EVAL, timestamp, timestamp)) ret = cur.fetchone() return ret @staticmethod def _vmaas_repos_modified_since(modified_since: str) -> list: """Get list of modified repose since `modified since`""" repos_json = { "repository_list": [".*"], "page": 1, "page_size": DEFAULT_PAGE_SIZE, "modified_since": modified_since } success, repos_pages = paging(VMAAS_REPOS_ENDPOINT, repos_json) if not success: return [] repos = [ repo_name_key for repo_name_key in repos_pages["repository_list"] ] LOGGER.info("%d repos found updated since %s", len(repos), modified_since) return repos def _get_updated_repos(self, conn) -> list: """Get repos updated since last repo-based evaluation""" with conn.cursor() as cur: modified_since_dt = self.get_last_repobased_eval_tms(cur) # last modified timestamp if modified_since_dt is None: modified_since_dt = dt.datetime.utcfromtimestamp(0).replace( tzinfo=dt.timezone.utc) # modified time is current time repos = self._vmaas_repos_modified_since( modified_since_dt.isoformat()) # list of modified repos self.set_last_repobased_eval_tms(cur, dt.datetime.now()) return repos async def re_evaluate_systems(self, repo_based: bool): """Schedule re-evaluation for all systems in DB.""" with DatabasePoolConnection() as conn: if repo_based: updated_repos = self._get_updated_repos(conn) with NamedCursor(conn) as cur: if repo_based: LOGGER.info("Re-evaluating in repo-based mode") self.select_repo_based_inventory_ids(cur, updated_repos) else: LOGGER.info("Re-evaluating all systems") self.select_all_inventory_ids(cur) total_scheduled = 0 while True: await RE_EVALUATION_KAFKA_BATCH_SEMAPHORE.acquire() rows = cur.fetchmany(size=RE_EVALUATION_KAFKA_BATCH_SIZE) if not rows: RE_EVALUATION_KAFKA_BATCH_SEMAPHORE.release() break msgs = [{ "type": "re-evaluate_system", "host": { "id": inventory_id } } for inventory_id, in rows] total_scheduled += len(msgs) future = self.evaluator_queue.send_list(msgs) future.add_done_callback( lambda x: RE_EVALUATION_KAFKA_BATCH_SEMAPHORE.release( )) LOGGER.info("%s systems scheduled for re-evaluation", total_scheduled) conn.commit() def _read_websocket_message(self, message): """Read incoming websocket messages.""" future = None if message is not None: if message == "webapps-refreshed": REFRESH.inc() LOGGER.info("VMaaS cache refreshed") sync_cve_md() if ENABLE_RE_EVALUATION: future = asyncio.ensure_future( self.re_evaluate_systems( ENABLE_REPO_BASED_RE_EVALUATION)) else: LOGGER.info("Re-evaluation is disabled, skipping") else: CNX_FAIL.inc() LOGGER.warning("Connection to %s closed: %s (%s)", self.vmaas_websocket_url, self.vmaas_websocket.close_reason, self.vmaas_websocket.close_code) self.vmaas_websocket = None return future
futures.append((user['name'], client.fetch(req))) elif user['server'] and last_activity.replace(tzinfo=None) > cull_limit.replace(tzinfo=None): app_log.debug("Not culling %s (active since %s)", user['name'], last_activity) for (name, f) in futures: yield f app_log.debug("Finished culling %s", name) if __name__ == '__main__': define('url', default=os.environ.get('JUPYTERHUB_API_URL') or 'http://127.0.0.1:8081/hub/api', help="The JupyterHub API URL") define('timeout', default=600, help="The idle timeout (in seconds)") define('cull_every', default=0, help="The interval (in seconds) for checking for idle servers to cull") parse_command_line() if not options.cull_every: options.cull_every = options.timeout // 2 api_token = os.environ['JUPYTERHUB_API_TOKEN'] loop = IOLoop.current() cull = lambda : cull_idle(options.url, api_token, options.timeout) # run once before scheduling periodic call loop.run_sync(cull) # schedule periodic cull pc = PeriodicCallback(cull, 1e3 * options.cull_every) pc.start() try: loop.start() except KeyboardInterrupt: pass
from tornado.ioloop import PeriodicCallback, IOLoop import tornado.wsgi class NowHandler(WebSocketHandler): clients = set() @staticmethod def echo_now(): for client in NowHandler.clients: client.write_message(time.ctime()) def open(self): NowHandler.clients.add(self) def on_close(self): NowHandler.clients.remove(self) wsgi_app = tornado.wsgi.WSGIContainer(app) application = tornado.web.Application([(r'/now', NowHandler), (r'.*', tornado.web.FallbackHandler, { 'fallback': wsgi_app })]) PeriodicCallback(NowHandler.echo_now, 1000).start() application.listen(5000) IOLoop.instance().start()
def open(self): self.game_info() self.callback = PeriodicCallback(callback_time=5000, callback=self.game_info) self.callback.start()
def __init__( self, handlers, blocked_handlers=None, stream_handlers=None, connection_limit=512, deserialize=True, serializers=None, deserializers=None, connection_args=None, timeout=None, io_loop=None, **kwargs, ): self.handlers = { "identity": self.identity, "connection_stream": self.handle_stream, } self.handlers.update(handlers) if blocked_handlers is None: blocked_handlers = dask.config.get( "distributed.%s.blocked-handlers" % type(self).__name__.lower(), []) self.blocked_handlers = blocked_handlers self.stream_handlers = {} self.stream_handlers.update(stream_handlers or {}) self.id = type(self).__name__ + "-" + str(uuid.uuid4()) self._address = None self._listen_address = None self._port = None self._comms = {} self.deserialize = deserialize self.monitor = SystemMonitor() self.counters = None self.digests = None self._ongoing_coroutines = weakref.WeakSet() self._event_finished = asyncio.Event() self.listeners = [] self.io_loop = io_loop or IOLoop.current() self.loop = self.io_loop if not hasattr(self.io_loop, "profile"): ref = weakref.ref(self.io_loop) if hasattr(self.io_loop, "asyncio_loop"): def stop(): loop = ref() return loop is None or loop.asyncio_loop.is_closed() else: def stop(): loop = ref() return loop is None or loop._closing self.io_loop.profile = profile.watch( omit=("profile.py", "selectors.py"), interval=dask.config.get( "distributed.worker.profile.interval"), cycle=dask.config.get("distributed.worker.profile.cycle"), stop=stop, ) # Statistics counters for various events with suppress(ImportError): from .counter import Digest self.digests = defaultdict(partial(Digest, loop=self.io_loop)) from .counter import Counter self.counters = defaultdict(partial(Counter, loop=self.io_loop)) self.periodic_callbacks = dict() pc = PeriodicCallback( self.monitor.update, parse_timedelta( dask.config.get("distributed.admin.system-monitor.interval")) * 1000, ) self.periodic_callbacks["monitor"] = pc self._last_tick = time() measure_tick_interval = parse_timedelta( dask.config.get("distributed.admin.tick.interval"), default="ms") pc = PeriodicCallback(self._measure_tick, measure_tick_interval * 1000) self.periodic_callbacks["tick"] = pc self.thread_id = 0 def set_thread_ident(): self.thread_id = threading.get_ident() self.io_loop.add_callback(set_thread_ident) self._startup_lock = asyncio.Lock() self.status = Status.undefined self.rpc = ConnectionPool( limit=connection_limit, deserialize=deserialize, serializers=serializers, deserializers=deserializers, connection_args=connection_args, timeout=timeout, server=self, ) self.__stopped = False super().__init__(**kwargs)
class JavascriptBuild(object): def __init__(self, build_id=None, model=None, source_path=None, autorelease_time=30000, is_server=False): self.build_id = build_id self.model = model self.context = Context() self.promise_type = self.context.glob.Promise self.build_cache = ExpiringDict(2048, 60) # this variable holds amount of users of this build. once this variable hits back to zero, # eventually the build will be released self.refs = 0 self._check_refs = PeriodicCallback(self.__check_build_refs__, autorelease_time) self.released = False try: script = Script(source=stdlib.source, filename=stdlib.name) self.context.eval(script) except Exception as e: logging.exception("Error while compiling stdlib.js") raise JavascriptBuildError(500, str(e)) if source_path: for file_name in os.listdir(source_path): if not file_name.endswith(".js"): continue logging.info("Compiling file {0}".format(os.path.join(source_path, file_name))) try: with open(os.path.join(source_path, file_name), 'r') as f: script = Script(source=f.read(), filename=str(file_name)) self.context.eval(script) except Exception as e: logging.exception("Error while compiling") raise JavascriptBuildError(500, str(e)) expose(self.context, is_server=is_server) if self.build_id: logging.info("Created new build {0}".format(self.build_id)) self._check_refs.start() def __check_build_refs__(self): if self.refs > 0: return if self.build_id: logging.info("Build {0} is being released because no usages left.".format(self.build_id)) self._remove_timeout = None IOLoop.current().add_callback(self.release) @validate(source_code="str", filename="str") def add_source(self, source_code, filename=None): script = Script(source=str(source_code), filename=str(filename)) try: self.context.eval(script) except JSException as e: raise JavascriptBuildError(500, e.message) @validate(class_name="str_name", args="json_dict") def session(self, class_name, args, log=None, debug=None, **env): if class_name not in self.context.glob: raise NoSuchClass() clazz = getattr(self.context.glob, class_name) # each 'session' class should have 'SessionClass.allow_session = true' defined if not getattr(clazz, "allow_session", False): raise NoSuchClass() handler = JavascriptCallHandler(self.build_cache, env, self.context, debug=debug, promise_type=self.promise_type) if log: handler.log = log PromiseContext.current = handler try: instance = new(clazz, args, env) except TypeError: raise JavascriptSessionError(500, "Failed to open session: TypeError while construction") except JSException as e: raise JavascriptSessionError(500, "Failed to open session: " + str(e)) # declare some usage, session will release it using 'session_released' call self.add_ref() return JavascriptSession(self, instance, env, log=log, debug=debug, cache=self.build_cache, promise_type=self.promise_type) @validate(method_name="str_name", args="json_dict") async def call(self, method_name, args, call_timeout=10, **env): if method_name.startswith("_"): raise NoSuchMethod() if method_name in JavascriptSession.CALL_BLACKLIST: raise NoSuchMethod() instance = self.context.glob if not hasattr(instance, method_name): raise NoSuchMethod() method = getattr(instance, method_name) # each plain function should have 'function.allow_call = true' defined if not getattr(method, "allow_call", False): raise NoSuchMethod() handler = JavascriptCallHandler(None, env, self.context, promise_type=self.promise_type) PromiseContext.current = handler # declare some usage until this call is finished self.add_ref() try: try: future = self.context.async_call(method, (args,), JSFuture) except JSException as e: value = e.value if hasattr(value, "code"): if hasattr(value, "stack"): raise JavascriptExecutionError(value.code, value.message, stack=str(value.stack)) else: raise JavascriptExecutionError(value.code, value.message) if hasattr(e, "stack"): raise JavascriptExecutionError(500, str(e), stack=str(e.stack)) raise JavascriptExecutionError(500, str(e)) except APIError as e: raise JavascriptExecutionError(e.code, e.message) except InternalError as e: raise JavascriptExecutionError( e.code, "Internal error: " + e.body) except JavaScriptTerminated: raise JavascriptExecutionError( 408, "Evaluation process timeout: function shouldn't be " "blocking and should rely on async methods instead.") except Exception as e: raise JavascriptExecutionError(500, str(e)) if future.done(): return future.result() try: result = await with_timeout(datetime.timedelta(seconds=call_timeout), future) except TimeoutError: raise APIError(408, "Total function '{0}' call timeout ({1})".format( method_name, call_timeout)) else: return result finally: del handler.context del handler self.remove_ref() def add_ref(self): self.refs += 1 def remove_ref(self): self.refs -= 1 async def session_released(self, session): self.remove_ref() async def release(self): if self.released: return self._check_refs.stop() self._check_refs = None if hasattr(self, "context"): del self.context if self.build_id: logging.info("Build released {0}".format(self.build_id)) if self.model: await self.model.build_released(self) self.released = True
class WebSocketChannelsHandler(WebSocketHandler, JupyterHandler): session = None gateway = None kernel_id = None ping_callback = None def check_origin(self, origin=None): return JupyterHandler.check_origin(self, origin) def set_default_headers(self): """Undo the set_default_headers in JupyterHandler which doesn't make sense for websockets""" pass def get_compression_options(self): # use deflate compress websocket return {} def authenticate(self): """Run before finishing the GET request Extend this method to add logic that should fire before the websocket finishes completing. """ # authenticate the request before opening the websocket if self.get_current_user() is None: self.log.warning("Couldn't authenticate WebSocket connection") raise web.HTTPError(403) if self.get_argument("session_id", False): self.session.session = cast_unicode( self.get_argument("session_id")) else: self.log.warning("No session ID specified") def initialize(self): self.log.debug("Initializing websocket connection %s", self.request.path) self.session = Session(config=self.config) self.gateway = GatewayWebSocketClient( gateway_url=GatewayClient.instance().url) async def get(self, kernel_id, *args, **kwargs): self.authenticate() self.kernel_id = cast_unicode(kernel_id, "ascii") await super(WebSocketChannelsHandler, self).get(kernel_id=kernel_id, *args, **kwargs) def send_ping(self): if self.ws_connection is None and self.ping_callback is not None: self.ping_callback.stop() return self.ping(b"") def open(self, kernel_id, *args, **kwargs): """Handle web socket connection open to notebook server and delegate to gateway web socket handler """ self.ping_callback = PeriodicCallback( self.send_ping, GATEWAY_WS_PING_INTERVAL_SECS * 1000) self.ping_callback.start() self.gateway.on_open( kernel_id=kernel_id, message_callback=self.write_message, compression_options=self.get_compression_options(), ) def on_message(self, message): """Forward message to gateway web socket handler.""" self.gateway.on_message(message) def write_message(self, message, binary=False): """Send message back to notebook client. This is called via callback from self.gateway._read_messages.""" if self.ws_connection: # prevent WebSocketClosedError if isinstance(message, bytes): binary = True super(WebSocketChannelsHandler, self).write_message(message, binary=binary) elif self.log.isEnabledFor(logging.DEBUG): msg_summary = WebSocketChannelsHandler._get_message_summary( json_decode(utf8(message))) self.log.debug( "Notebook client closed websocket connection - message dropped: {}" .format(msg_summary)) def on_close(self): self.log.debug("Closing websocket connection %s", self.request.path) self.gateway.on_close() super(WebSocketChannelsHandler, self).on_close() @staticmethod def _get_message_summary(message): summary = [] message_type = message["msg_type"] summary.append("type: {}".format(message_type)) if message_type == "status": summary.append(", state: {}".format( message["content"]["execution_state"])) elif message_type == "error": summary.append(", {}:{}:{}".format( message["content"]["ename"], message["content"]["evalue"], message["content"]["traceback"], )) else: summary.append(", ...") # don't display potentially sensitive data return "".join(summary)
def open(self): self.callback = PeriodicCallback(self.send_temp, 120) self.callback.start()
class Pool(object): """A connection pool that manages PostgreSQL connections. """ def __init__(self, min_conn, max_conn, cleanup_timeout, *args, **kwargs): self.min_conn = min_conn self.max_conn = max_conn self.closed = False self._args = args self._kwargs = kwargs self._pool = [] for i in range(self.min_conn): self._new_conn() # Create a periodic callback that tries to close inactive connections if cleanup_timeout > 0: self._cleaner = PeriodicCallback(self._clean_pool, cleanup_timeout * 1000) self._cleaner.start() def _new_conn(self, new_cursor_args={}): """Create a new connection. If `new_cursor_args` is provided a new cursor is created when the callback is executed. """ if len(self._pool) > self.max_conn: raise PoolError('connection pool exausted') conn = psycopg2.connect(*self._args, **self._kwargs) add_conn = functools.partial(self._add_conn, conn) if new_cursor_args: new_cursor_args['connection'] = conn new_cursor = functools.partial(self._new_cursor, **new_cursor_args) Poller(conn, (add_conn, new_cursor)).start() else: Poller(conn, (add_conn, )).start() def _add_conn(self, conn): """Add a connection to the pool. This function is used by `_new_conn` as a callback to add the created connection to the pool. """ self._pool.append(conn) def _new_cursor(self, function, func_args=(), callback=None, connection=None): """Create a new cursor. If there's no connection available, a new connection will be created and `_new_cursor` will be called again after the connection has been made. """ if not connection: connection = self._get_free_conn() if not connection: new_cursor_args = { 'function': function, 'func_args': func_args, 'callback': callback } self._new_conn(new_cursor_args) return cursor = connection.cursor() getattr(cursor, function)(*func_args) # Callbacks from cursor fucntion always get the cursor back callback = functools.partial(callback, cursor) Poller(cursor.connection, (callback, )).start() def _get_free_conn(self): """Look for a free connection and return it. `None` is returned when no free connection can be found. """ if self.closed: raise PoolError('connection pool is closed') for conn in self._pool: if not conn.isexecuting(): return conn return None def _clean_pool(self): """Try to close the number of connections that exceeds the number in `min_conn`. This method loops throught the connections in `_pool` and if it finds a free connection it closes it. """ if self.closed: raise PoolError('connection pool is closed') if len(self._pool) > self.min_conn: conns = len(self._pool) - self.min_conn indexes = [] for i, conn in enumerate(self._pool): if not conn.isexecuting(): conn.close() conns = conns - 1 indexes.append(i) if conns == 0: break for i in indexes: self._pool.pop(i) def execute(self, operation, parameters=(), callback=None): """http://initd.org/psycopg/docs/cursor.html#cursor.execute """ self._new_cursor('execute', (operation, parameters), callback) def executemany(self, operation, parameters=None, callback=None): """http://initd.org/psycopg/docs/cursor.html#cursor.executemany """ self._new_cursor('executemany', (operation, parameters), callback) def callproc(self, procname, parameters=None, callback=None): """http://initd.org/psycopg/docs/cursor.html#cursor.callproc """ self._new_cursor('callproc', (procname, parameters), callback) def close(self): """Close all open connections. """ if self.closed: raise PoolError('connection pool is closed') for conn in self._pool: if not conn.closed: conn.close() self._pool = [] self.closed = True
def _widget(self): """ Create IPython widget for display within a notebook """ try: return self._cached_widget except AttributeError: pass try: from ipywidgets import Layout, VBox, HBox, IntText, Button, HTML, Accordion except ImportError: self._cached_widget = None return None layout = Layout(width="150px") if self.dashboard_link: link = '<p><b>Dashboard: </b><a href="%s" target="_blank">%s</a></p>\n' % ( self.dashboard_link, self.dashboard_link, ) else: link = "" title = "<h2>%s</h2>" % self._cluster_class_name title = HTML(title) dashboard = HTML(link) status = HTML(self._widget_status(), layout=Layout(min_width="150px")) if self._supports_scaling: request = IntText(0, description="Workers", layout=layout) scale = Button(description="Scale", layout=layout) minimum = IntText(0, description="Minimum", layout=layout) maximum = IntText(0, description="Maximum", layout=layout) adapt = Button(description="Adapt", layout=layout) accordion = Accordion( [HBox([request, scale]), HBox([minimum, maximum, adapt])], layout=Layout(min_width="500px"), ) accordion.selected_index = None accordion.set_title(0, "Manual Scaling") accordion.set_title(1, "Adaptive Scaling") def adapt_cb(b): self.adapt(minimum=minimum.value, maximum=maximum.value) update() adapt.on_click(adapt_cb) def scale_cb(b): with log_errors(): n = request.value with suppress(AttributeError): self._adaptive.stop() self.scale(n) update() scale.on_click(scale_cb) else: accordion = HTML("") box = VBox([title, HBox([status, accordion]), dashboard]) self._cached_widget = box def update(): status.value = self._widget_status() cluster_repr_interval = parse_timedelta( dask.config.get("distributed.deploy.cluster-repr-interval", default="ms")) pc = PeriodicCallback(update, cluster_repr_interval * 1000) self.periodic_callbacks["cluster-repr"] = pc pc.start() return box
class Status_Monitor(object): """ Display the tasks running and waiting on each worker Parameters ---------- addr: tuple, optional (ip, port) of scheduler. Defaults to scheduler of recent Executor interval: Number, optional Interval between updates. Defaults to 1s """ def __init__(self, addr=None, interval=1000.00, loop=None): if addr is None: scheduler = default_executor().scheduler if isinstance(scheduler, rpc): addr = (scheduler.ip, 9786) elif isinstance(scheduler, Scheduler): addr = ('127.0.0.1', scheduler.services['http'].port) self.addr = addr self.interval = interval self.display_notebook = False if is_kernel() and not curstate().notebook: output_notebook() assert curstate().notebook self.task_source, self.task_table = task_table_plot() self.worker_source, self.worker_table = worker_table_plot() self.output = vplot(self.worker_table, self.task_table) self.client = AsyncHTTPClient() self.loop = loop or IOLoop.current() self.loop.add_callback(self.update) self._pc = PeriodicCallback(self.update, self.interval, io_loop=self.loop) self._pc.start() def _ipython_display_(self, **kwargs): show(self.output) self.display_notebook = True @gen.coroutine def update(self): """ Query the Scheduler, update the figure This opens a connection to the scheduler, sends it a function to run periodically, streams the results back and uses those results to update the bokeh figure """ with log_errors(): tasks, workers = yield [ self.client.fetch('http://%s:%d/tasks.json' % self.addr), self.client.fetch('http://%s:%d/workers.json' % self.addr) ] tasks = json.loads(tasks.body.decode()) workers = json.loads(workers.body.decode()) task_table_update(self.task_source, tasks) worker_table_update(self.worker_source, workers) if self.display_notebook: push_notebook()
def run(): log_rotate = LogRotate() PeriodicCallback(log_rotate.consumer, 1000).start() IOLoop.instance().start()
class WorkStealing(SchedulerPlugin): def __init__(self, scheduler): self.scheduler = scheduler self.stealable_all = [set() for i in range(15)] self.stealable = dict() self.key_stealable = dict() self.stealable_unknown_durations = defaultdict(set) self.cost_multipliers = [1 + 2 ** (i - 6) for i in range(15)] self.cost_multipliers[0] = 1 for worker in scheduler.workers: self.add_worker(worker=worker) self._pc = PeriodicCallback(callback=self.balance, callback_time=100, io_loop=self.scheduler.loop) self.scheduler.loop.add_callback(self._pc.start) self.scheduler.plugins.append(self) self.scheduler.extensions['stealing'] = self self.log = deque(maxlen=100000) self.count = 0 scheduler.worker_handlers['long-running'] = self.transition_long_running def add_worker(self, scheduler=None, worker=None): self.stealable[worker] = [set() for i in range(15)] def remove_worker(self, scheduler=None, worker=None): del self.stealable[worker] def teardown(self): self._pc.stop() def transition(self, key, start, finish, compute_start=None, compute_stop=None, *args, **kwargs): if finish == 'processing': self.put_key_in_stealable(key) if start == 'processing': self.remove_key_from_stealable(key) if finish == 'memory': ks = key_split(key) if ks in self.stealable_unknown_durations: for k in self.stealable_unknown_durations.pop(ks): if self.scheduler.task_state[k] == 'processing': self.put_key_in_stealable(k, split=ks) def transition_long_running(self, key=None, worker=None): self.remove_key_from_stealable(key) def put_key_in_stealable(self, key, split=None): worker = self.scheduler.rprocessing[key] cost_multiplier, level = self.steal_time_ratio(key, split=split) if cost_multiplier is not None: self.stealable_all[level].add(key) self.stealable[worker][level].add(key) self.key_stealable[key] = (worker, level) def remove_key_from_stealable(self, key): result = self.key_stealable.pop(key, None) if result is not None: worker, level = result try: self.stealable[worker][level].remove(key) except KeyError: pass try: self.stealable_all[level].remove(key) except KeyError: pass def steal_time_ratio(self, key, split=None): """ The compute to communication time ratio of a key Returns ------- cost_multiplier: The increased cost from moving this task as a factor. For example a result of zero implies a task without dependencies. level: The location within a stealable list to place this value """ if (key not in self.scheduler.loose_restrictions and (key in self.scheduler.host_restrictions or key in self.scheduler.worker_restrictions) or key in self.scheduler.resource_restrictions): return None, None # don't steal if not self.scheduler.dependencies[key]: # no dependencies fast path return 0, 0 nbytes = sum(self.scheduler.nbytes.get(k, 1000) for k in self.scheduler.dependencies[key]) transfer_time = nbytes / BANDWIDTH + LATENCY split = split or key_split(key) if split in fast_tasks: return None, None try: worker = self.scheduler.rprocessing[key] compute_time = self.scheduler.processing[worker][key] except KeyError: self.stealable_unknown_durations[split].add(key) return None, None else: if compute_time < 0.005: # 5ms, just give up return None, None cost_multiplier = transfer_time / compute_time if cost_multiplier > 100: return None, None level = int(round(log(cost_multiplier) / log_2 + 6, 0)) level = max(1, level) return cost_multiplier, level def move_task(self, key, victim, thief): try: if self.scheduler.validate: if victim != self.scheduler.rprocessing[key]: import pdb; pdb.set_trace() self.remove_key_from_stealable(key) logger.debug("Moved %s, %s: %2f -> %s: %2f", key, victim, self.scheduler.occupancy[victim], thief, self.scheduler.occupancy[thief]) duration = self.scheduler.processing[victim].pop(key) self.scheduler.occupancy[victim] -= duration self.scheduler.total_occupancy -= duration duration = self.scheduler.task_duration.get(key_split(key), 0.5) duration += sum(self.scheduler.nbytes[key] for key in self.scheduler.dependencies[key] - self.scheduler.has_what[thief]) / BANDWIDTH self.scheduler.processing[thief][key] = duration self.scheduler.rprocessing[key] = thief self.scheduler.occupancy[thief] += duration self.scheduler.total_occupancy += duration self.put_key_in_stealable(key) self.scheduler.worker_comms[victim].send({'op': 'release-task', 'reason': 'stolen', 'key': key}) try: self.scheduler.send_task_to_worker(thief, key) except CommClosedError: self.scheduler.remove_worker(thief) except CommClosedError: logger.info("Worker comm closed while stealing: %s", victim) except Exception as e: logger.exception(e) if LOG_PDB: import pdb; pdb.set_trace() raise def balance(self): with log_errors(): i = 0 s = self.scheduler occupancy = s.occupancy idle = s.idle saturated = s.saturated if not idle or len(idle) == len(self.scheduler.workers): return log = list() start = time() seen = False acted = False if not s.saturated: saturated = topk(10, s.workers, key=occupancy.get) saturated = [w for w in saturated if occupancy[w] > 0.2 and len(s.processing[w]) > s.ncores[w]] elif len(s.saturated) < 20: saturated = sorted(saturated, key=occupancy.get, reverse=True) if len(idle) < 20: idle = sorted(idle, key=occupancy.get) for level, cost_multiplier in enumerate(self.cost_multipliers): if not idle: break for sat in list(saturated): stealable = self.stealable[sat][level] if not stealable or not idle: continue else: seen = True for key in list(stealable): i += 1 if not idle: break idl = idle[i % len(idle)] duration = s.processing[sat][key] if (occupancy[idl] + cost_multiplier * duration <= occupancy[sat] - duration / 2): self.move_task(key, sat, idl) log.append((start, level, key, duration, sat, occupancy[sat], idl, occupancy[idl])) self.scheduler.check_idle_saturated(sat) self.scheduler.check_idle_saturated(idl) seen = True if self.cost_multipliers[level] < 20: # don't steal from public at cost stealable = self.stealable_all[level] if stealable: seen = True for key in list(stealable): if not idle: break sat = s.rprocessing[key] if occupancy[sat] < 0.2: continue if len(s.processing[sat]) <= s.ncores[sat]: continue i += 1 idl = idle[i % len(idle)] duration = s.processing[sat][key] if (occupancy[idl] + cost_multiplier * duration <= occupancy[sat] - duration / 2): self.move_task(key, sat, idl) log.append((start, level, key, duration, sat, occupancy[sat], idl, occupancy[idl])) self.scheduler.check_idle_saturated(sat) self.scheduler.check_idle_saturated(idl) seen = True if seen and not acted: break if log: self.log.append(log) self.count += 1 stop = time() if self.scheduler.digests: self.scheduler.digests['steal-duration'].add(stop - start) def restart(self, scheduler): for stealable in self.stealable.values(): for s in stealable: s.clear() for s in self.stealable_all: s.clear() self.key_stealable.clear() self.stealable_unknown_durations.clear() def story(self, *keys): keys = set(keys) return [t for L in self.log for t in L if any(x in keys for x in t)]
def test__lifecycle_hooks(): application = Application() handler = HookTestHandler() application.add(handler) with ManagedServerLoop(application, check_unused_sessions_milliseconds=20) as server: # wait for server callbacks to run before we mix in the # session, this keeps the test deterministic def check_done(): if len(handler.hooks) == 4: server.io_loop.stop() server_load_checker = PeriodicCallback(check_done, 1, io_loop=server.io_loop) server_load_checker.start() server.io_loop.start() server_load_checker.stop() # now we create a session client_session = pull_session(session_id='test__lifecycle_hooks', url=url(server), io_loop=server.io_loop) client_doc = client_session.document assert len(client_doc.roots) == 1 server_session = server.get_session('/', client_session.id) server_doc = server_session.document assert len(server_doc.roots) == 1 client_session.close() # expire the session quickly rather than after the # usual timeout server_session.request_expiration() def on_done(): server.io_loop.stop() server.io_loop.call_later(0.1, on_done) server.io_loop.start() assert handler.hooks == ["server_loaded", "next_tick_server", "timeout_server", "periodic_server", "session_created", "next_tick_session", "modify", "timeout_session", "periodic_session", "session_destroyed", "server_unloaded"] client_hook_list = client_doc.roots[0] server_hook_list = server_doc.roots[0] assert handler.load_count == 1 assert handler.unload_count == 1 assert handler.session_creation_async_value == 6 assert client_doc.title == "Modified" assert server_doc.title == "Modified" # the client session doesn't see the event that adds "session_destroyed" since # we shut down at that point. assert client_hook_list.hooks == ["session_created", "modify"] assert server_hook_list.hooks == ["session_created", "modify", "session_destroyed"]
class DRMAACluster(object): def __init__(self, template=None, cleanup_interval=1000, hostname=None, script=None, **kwargs): """ Dask workers launched by a DRMAA-compatible cluster Parameters ---------- jobName: string Name of the job as known by the DRMAA cluster. script: string (optional) Path to the dask-worker executable script. A temporary file will be made if none is provided (recommended) args: list Extra string arguments to pass to dask-worker outputPath: string errorPath: string workingDirectory: string Where dask-worker runs, defaults to current directory nativeSpecification: string Options native to the job scheduler Examples -------- >>> from dask_drmaa import DRMAACluster # doctest: +SKIP >>> cluster = DRMAACluster() # doctest: +SKIP >>> cluster.start_workers(10) # doctest: +SKIP >>> from distributed import Client # doctest: +SKIP >>> client = Client(cluster) # doctest: +SKIP >>> future = client.submit(lambda x: x + 1, 10) # doctest: +SKIP >>> future.result() # doctest: +SKIP 11 """ self.hostname = hostname or socket.gethostname() logger.info("Start local scheduler at %s", self.hostname) self.local_cluster = LocalCluster(n_workers=0, ip='', **kwargs) if script is None: fn = tempfile.mktemp(suffix='sh', prefix='dask-worker-script', dir=os.path.curdir) self.script = fn with open(fn, 'wt') as f: f.write(script_template) @atexit.register def remove_script(): if os.path.exists(fn): os.remove(fn) os.chmod(self.script, 0o777) else: self.script = script # TODO: check that user-provided script is executable self.template = merge(default_template, {'remoteCommand': self.script}, template or {}) self._cleanup_callback = PeriodicCallback(callback=self.cleanup_closed_workers, callback_time=cleanup_interval, io_loop=self.scheduler.loop) self._cleanup_callback.start() self.workers = {} # {job-id: {'resource': quanitty}} @gen.coroutine def _start(self): pass @property def scheduler(self): return self.local_cluster.scheduler @property def scheduler_address(self): return self.scheduler.address def create_job_template(self, **kwargs): template = self.template.copy() if kwargs: template.update(kwargs) template['args'] = [self.scheduler_address] + template['args'] jt = get_session().createJobTemplate() valid_attributes = dir(jt) for key, value in template.items(): if key not in valid_attributes: raise ValueError("Invalid job template attribute %s" % key) setattr(jt, key, value) return jt def start_workers(self, n=1, **kwargs): with log_errors(): with self.create_job_template(**kwargs) as jt: ids = get_session().runBulkJobs(jt, 1, n, 1) logger.info("Start %d workers. Job ID: %s", len(ids), ids[0].split('.')[0]) self.workers.update({jid: kwargs for jid in ids}) def stop_workers(self, worker_ids, sync=False): if isinstance(worker_ids, str): worker_ids = [worker_ids] for wid in list(worker_ids): try: get_session().control(wid, drmaa.JobControlAction.TERMINATE) except drmaa.errors.InvalidJobException: pass self.workers.pop(wid) logger.info("Stop workers %s", worker_ids) if sync: get_session().synchronize(worker_ids, dispose=True) def close(self): logger.info("Closing DRMAA cluster") self.local_cluster.close() if self.workers: self.stop_workers(self.workers, sync=True) if os.path.exists(self.script): os.remove(self.script) def __enter__(self): return self def __exit__(self, *args): self.close() def cleanup_closed_workers(self): for jid in list(self.workers): if get_session().jobStatus(jid) in ('closed', 'done'): logger.info("Removing closed worker %s", jid) del self.workers[jid] def __del__(self): try: self.close() except: pass def __str__(self): return "<%s: %d workers>" % (self.__class__.__name__, len(self.workers)) __repr__ = __str__
def _widget(self): """Create IPython widget for display within a notebook""" try: return self._cached_widget except AttributeError: pass try: from ipywidgets import ( HTML, Accordion, Button, HBox, IntText, Layout, Tab, VBox, ) except ImportError: self._cached_widget = None return None layout = Layout(width="150px") status = HTML(self._repr_html_()) if self._supports_scaling: request = IntText(0, description="Workers", layout=layout) scale = Button(description="Scale", layout=layout) minimum = IntText(0, description="Minimum", layout=layout) maximum = IntText(0, description="Maximum", layout=layout) adapt = Button(description="Adapt", layout=layout) accordion = Accordion( [HBox([request, scale]), HBox([minimum, maximum, adapt])], layout=Layout(min_width="500px"), ) accordion.selected_index = None accordion.set_title(0, "Manual Scaling") accordion.set_title(1, "Adaptive Scaling") def adapt_cb(b): self.adapt(minimum=minimum.value, maximum=maximum.value) update() adapt.on_click(adapt_cb) def scale_cb(b): with log_errors(): n = request.value with suppress(AttributeError): self._adaptive.stop() self.scale(n) update() scale.on_click(scale_cb) else: # pragma: no cover accordion = HTML("") scale_status = HTML(self._scaling_status()) tab = Tab() tab.children = [status, VBox([scale_status, accordion])] tab.set_title(0, "Status") tab.set_title(1, "Scaling") self._cached_widget = tab def update(): status.value = self._repr_html_() scale_status.value = self._scaling_status() cluster_repr_interval = parse_timedelta( dask.config.get("distributed.deploy.cluster-repr-interval", default="ms")) pc = PeriodicCallback(update, cluster_repr_interval * 1000) self.periodic_callbacks["cluster-repr"] = pc pc.start() return tab
class MasterController(AsyncDeviceServer): """This is the main KATCP interface for the FBFUSE multi-beam beamformer on MeerKAT. This interface satisfies the following ICDs: CAM-FBFUSE: <link> TUSE-FBFUSE: <link> """ VERSION_INFO = ("mpikat-api", 0, 1) BUILD_INFO = ("mpikat-implementation", 0, 1, "rc1") DEVICE_STATUSES = ["ok", "degraded", "fail"] def __init__(self, ip, port, worker_pool): """ @brief Construct new MasterController instance @params ip The IP address on which the server should listen @params port The port that the server should bind to """ super(MasterController, self).__init__(ip, port) self._products = {} self._server_pool = worker_pool def start(self): """ @brief Start the MasterController server """ super(MasterController, self).start() def stop(self): self._ntp_callback.stop() super(MasterController, self).stop() def add_sensor(self, sensor): log.debug("Adding sensor: {}".format(sensor.name)) super(MasterController, self).add_sensor(sensor) def remove_sensor(self, sensor): log.debug("Removing sensor: {}".format(sensor.name)) super(MasterController, self).remove_sensor(sensor) def setup_sensors(self): """ @brief Set up monitoring sensors. @note The following sensors are made available on top of default sensors implemented in AsynDeviceServer and its base classes. device-status: Reports the health status of the controller and associated devices: Among other things report HW failure, SW failure and observation failure. local-time-synced: Indicates whether the local time of the servers is synchronised to the master time reference (use NTP). This sensor is aggregated from all nodes that are part of FBF and will return "not sync'd" if any nodes are unsyncronised. products: The list of product_ids that controller is currently handling """ self._device_status = Sensor.discrete( "device-status", description="Health status of FBFUSE", params=self.DEVICE_STATUSES, default="ok", initial_status=Sensor.NOMINAL) self.add_sensor(self._device_status) self._local_time_synced = Sensor.boolean( "local-time-synced", description="Indicates FBF is NTP syncronised.", default=True, initial_status=Sensor.UNKNOWN) self.add_sensor(self._local_time_synced) def ntp_callback(): log.debug("Checking NTP sync") try: synced = check_ntp_sync() except Exception: log.exception("Unable to check NTP sync") self._local_time_synced.set_value(False) else: if not synced: log.warning("Server is not NTP synced") self._local_time_synced.set_value(synced) ntp_callback() self._ntp_callback = PeriodicCallback(ntp_callback, NTP_CALLBACK_PERIOD) self._ntp_callback.start() self._products_sensor = Sensor.string( "products", description="The names of the currently configured products", default="", initial_status=Sensor.NOMINAL) self.add_sensor(self._products_sensor) def _update_products_sensor(self): self._products_sensor.set_value(",".join(self._products.keys())) def _get_product(self, product_id): if product_id not in self._products: raise ProductLookupError( "No product configured with ID: {}".format(product_id)) else: return self._products[product_id] @request(Str(), Int()) @return_reply() def request_register_worker_server(self, req, hostname, port): """ @brief Register an WorkerWrapper instance @params hostname The hostname for the worker server @params port The port number that the worker server serves on @detail Register an WorkerWrapper instance that can be used for FBFUSE computation. FBFUSE has no preference for the order in which control servers are allocated to a subarray. An WorkerWrapper wraps an atomic unit of compute comprised of one CPU, one GPU and one NIC (i.e. one NUMA node on an FBFUSE compute server). """ log.debug("Received request to register worker server at {}:{}".format( hostname, port)) self._server_pool.add(hostname, port) return ("ok", ) @request(Str(), Int()) @return_reply() def request_deregister_worker_server(self, req, hostname, port): """ @brief Deregister an WorkerWrapper instance @params hostname The hostname for the worker server @params port The port number that the worker server serves on @detail The graceful way of removing a server from rotation. If the server is currently actively processing an exception will be raised. """ log.debug( "Received request to deregister worker server at {}:{}".format( hostname, port)) try: self._server_pool.remove(hostname, port) except ServerDeallocationError as error: log.error( "Request to deregister worker server at {}:{} failed with error: {}" .format(hostname, port, str(error))) return ("fail", str(error)) else: return ("ok", ) @request() @return_reply(Int()) def request_worker_server_list(self, req): """ @brief List all control servers regardless of if they are reachable """ for server in self._server_pool.list_all(): req.inform("{}".format(server)) return ("ok", len(self._server_pool.list_all())) @request() @return_reply(Int()) def request_worker_server_status(self, req): """ @brief List all used and available worker servers and provide minimal metadata """ for server in self._server_pool.used(): req.inform("{} allocated".format(server)) for server in self._server_pool.available(): req.inform("{} free".format(server)) return ("ok", len(self._server_pool.used()) + len(self._server_pool.available())) @request() @return_reply(Int()) def request_product_list(self, req): """ @brief List all currently registered products and their states @param req A katcp request object @note The details of each product are provided via an #inform as a JSON string containing information on the product state. @return katcp reply object [[[ !product-list ok | (fail [error description]) <number of configured products> ]]], """ for product_id, product in self._products.items(): info = {} info[product_id] = product.info() as_json = json.dumps(info) req.inform(as_json) return ("ok", len(self._products))
def __init__(self, template=None, cleanup_interval=1000, hostname=None, script=None, **kwargs): """ Dask workers launched by a DRMAA-compatible cluster Parameters ---------- jobName: string Name of the job as known by the DRMAA cluster. script: string (optional) Path to the dask-worker executable script. A temporary file will be made if none is provided (recommended) args: list Extra string arguments to pass to dask-worker outputPath: string errorPath: string workingDirectory: string Where dask-worker runs, defaults to current directory nativeSpecification: string Options native to the job scheduler Examples -------- >>> from dask_drmaa import DRMAACluster # doctest: +SKIP >>> cluster = DRMAACluster() # doctest: +SKIP >>> cluster.start_workers(10) # doctest: +SKIP >>> from distributed import Client # doctest: +SKIP >>> client = Client(cluster) # doctest: +SKIP >>> future = client.submit(lambda x: x + 1, 10) # doctest: +SKIP >>> future.result() # doctest: +SKIP 11 """ self.hostname = hostname or socket.gethostname() logger.info("Start local scheduler at %s", self.hostname) self.local_cluster = LocalCluster(n_workers=0, ip='', **kwargs) if script is None: fn = tempfile.mktemp(suffix='sh', prefix='dask-worker-script', dir=os.path.curdir) self.script = fn with open(fn, 'wt') as f: f.write(script_template) @atexit.register def remove_script(): if os.path.exists(fn): os.remove(fn) os.chmod(self.script, 0o777) else: self.script = script # TODO: check that user-provided script is executable self.template = merge(default_template, {'remoteCommand': self.script}, template or {}) self._cleanup_callback = PeriodicCallback(callback=self.cleanup_closed_workers, callback_time=cleanup_interval, io_loop=self.scheduler.loop) self._cleanup_callback.start() self.workers = {} # {job-id: {'resource': quanitty}}
class App_tornado(App_base): """Hijack Tornado event loop. Tornado does have a function to process events, but it does not work when the event loop is already running. Therefore we don't enter the real Tornado event loop, but just poll it regularly. """ def __init__(self): # Try importing import tornado.ioloop # Get the "app" instance self.app = tornado.ioloop.IOLoop.instance() # Replace mainloop with a dummy def dummy_start(): print_mainloop_warning() sys._pyzoInterpreter.ignore_sys_exit = True self.app.add_callback(reset_sys_exit) def dummy_stop(): pass def reset_sys_exit(): sys._pyzoInterpreter.ignore_sys_exit = False def run_sync(func, timeout=None): self.app.start = self.app._original_start try: self.app._original_run_sync(func, timeout) finally: self.app.start = self.app._dummy_start # self.app._original_start = self.app.start self.app._dummy_start = dummy_start self.app.start = self.app._dummy_start # self.app._original_stop = self.app.stop self.app._dummy_stop = dummy_stop self.app.stop = self.app._dummy_stop # self.app._original_run_sync = self.app.run_sync self.app.run_sync = run_sync # Notify that we integrated the event loop self.app._in_event_loop = "Pyzo" self._warned_about_process_events = False def process_events(self): if not self._warned_about_process_events: print("Warning: cannot process events synchronously in Tornado") self._warned_about_process_events = True # self.app.run_sync(lambda x=None: None) def run(self, repl_callback, sleeptime=None): from tornado.ioloop import PeriodicCallback # Create timer self._timer = PeriodicCallback(repl_callback, 0.05 * 1000) self._timer.start() # Enter mainloop self.app._original_start() while True: try: self.app._original_start() except KeyboardInterrupt: self._keyboard_interrupt() self.app._original_stop() continue break def quit(self): self.app._original_stop()
class Spawner(LoggingConfigurable): """Base class for spawning single-user notebook servers. Subclass this, and override the following methods: - load_state - get_state - start - stop - poll As JupyterHub supports multiple users, an instance of the Spawner subclass is created for each user. If there are 20 JupyterHub users, there will be 20 instances of the subclass. """ # private attributes for tracking status _spawn_pending = False _start_pending = False _stop_pending = False _proxy_pending = False _waiting_for_response = False _jupyterhub_version = None _spawn_future = None @property def _log_name(self): """Return username:servername or username Used in logging for consistency with named servers. """ if self.name: return '%s:%s' % (self.user.name, self.name) else: return self.user.name @property def pending(self): """Return the current pending event, if any Return False if nothing is pending. """ if self._spawn_pending: return 'spawn' elif self._stop_pending: return 'stop' return False @property def ready(self): """Is this server ready to use? A server is not ready if an event is pending. """ if self.pending: return False if self.server is None: return False return True @property def active(self): """Return True if the server is active. This includes fully running and ready or any pending start/stop event. """ return bool(self.pending or self.ready) authenticator = Any() hub = Any() orm_spawner = Any() db = Any() @observe('orm_spawner') def _orm_spawner_changed(self, change): if change.new and change.new.server: self._server = Server(orm_server=change.new.server) else: self._server = None user = Any() def __init_subclass__(cls, **kwargs): super().__init_subclass__() missing = [] for attr in ('start','stop', 'poll'): if getattr(Spawner, attr) is getattr(cls, attr): missing.append(attr) if missing: raise NotImplementedError("class `{}` needs to redefine the `start`," "`stop` and `poll` methods. `{}` not redefined.".format(cls.__name__, '`, `'.join(missing))) proxy_spec = Unicode() @property def server(self): if hasattr(self, '_server'): return self._server if self.orm_spawner and self.orm_spawner.server: return Server(orm_server=self.orm_spawner.server) @server.setter def server(self, server): self._server = server if self.orm_spawner: if self.orm_spawner.server is not None: # delete the old value db = inspect(self.orm_spawner.server).session db.delete(self.orm_spawner.server) if server is None: self.orm_spawner.server = None else: self.orm_spawner.server = server.orm_server @property def name(self): if self.orm_spawner: return self.orm_spawner.name return '' admin_access = Bool(False) api_token = Unicode() oauth_client_id = Unicode() will_resume = Bool(False, help="""Whether the Spawner will resume on next start Default is False where each launch of the Spawner will be a new instance. If True, an existing Spawner will resume instead of starting anew (e.g. resuming a Docker container), and API tokens in use when the Spawner stops will not be deleted. """ ) ip = Unicode('', help=""" The IP address (or hostname) the single-user server should listen on. The JupyterHub proxy implementation should be able to send packets to this interface. """ ).tag(config=True) port = Integer(0, help=""" The port for single-user servers to listen on. Defaults to `0`, which uses a randomly allocated port number each time. If set to a non-zero value, all Spawners will use the same port, which only makes sense if each server is on a different address, e.g. in containers. New in version 0.7. """ ).tag(config=True) start_timeout = Integer(60, help=""" Timeout (in seconds) before giving up on starting of single-user server. This is the timeout for start to return, not the timeout for the server to respond. Callers of spawner.start will assume that startup has failed if it takes longer than this. start should return when the server process is started and its location is known. """ ).tag(config=True) http_timeout = Integer(30, help=""" Timeout (in seconds) before giving up on a spawned HTTP server Once a server has successfully been spawned, this is the amount of time we wait before assuming that the server is unable to accept connections. """ ).tag(config=True) poll_interval = Integer(30, help=""" Interval (in seconds) on which to poll the spawner for single-user server's status. At every poll interval, each spawner's `.poll` method is called, which checks if the single-user server is still running. If it isn't running, then JupyterHub modifies its own state accordingly and removes appropriate routes from the configurable proxy. """ ).tag(config=True) _callbacks = List() _poll_callback = Any() debug = Bool(False, help="Enable debug-logging of the single-user server" ).tag(config=True) options_form = Unicode( help=""" An HTML form for options a user can specify on launching their server. The surrounding `<form>` element and the submit button are already provided. For example: .. code:: html Set your key: <input name="key" val="default_key"></input> <br> Choose a letter: <select name="letter" multiple="true"> <option value="A">The letter A</option> <option value="B">The letter B</option> </select> The data from this form submission will be passed on to your spawner in `self.user_options` """).tag(config=True) def options_from_form(self, form_data): """Interpret HTTP form data Form data will always arrive as a dict of lists of strings. Override this function to understand single-values, numbers, etc. This should coerce form data into the structure expected by self.user_options, which must be a dict. Instances will receive this data on self.user_options, after passing through this function, prior to `Spawner.start`. """ return form_data user_options = Dict( help=""" Dict of user specified options for the user's spawned instance of a single-user server. These user options are usually provided by the `options_form` displayed to the user when they start their server. """) env_keep = List([ 'PATH', 'PYTHONPATH', 'CONDA_ROOT', 'CONDA_DEFAULT_ENV', 'VIRTUAL_ENV', 'LANG', 'LC_ALL', ], help=""" Whitelist of environment variables for the single-user server to inherit from the JupyterHub process. This whitelist is used to ensure that sensitive information in the JupyterHub process's environment (such as `CONFIGPROXY_AUTH_TOKEN`) is not passed to the single-user server's process. """ ).tag(config=True) env = Dict(help="""Deprecated: use Spawner.get_env or Spawner.environment - extend Spawner.get_env for adding required env in Spawner subclasses - Spawner.environment for config-specified env """) environment = Dict( help=""" Extra environment variables to set for the single-user server's process. Environment variables that end up in the single-user server's process come from 3 sources: - This `environment` configurable - The JupyterHub process' environment variables that are whitelisted in `env_keep` - Variables to establish contact between the single-user notebook and the hub (such as JUPYTERHUB_API_TOKEN) The `environment` configurable should be set by JupyterHub administrators to add installation specific environment variables. It is a dict where the key is the name of the environment variable, and the value can be a string or a callable. If it is a callable, it will be called with one parameter (the spawner instance), and should return a string fairly quickly (no blocking operations please!). Note that the spawner class' interface is not guaranteed to be exactly same across upgrades, so if you are using the callable take care to verify it continues to work after upgrades! """ ).tag(config=True) cmd = Command(['jupyterhub-singleuser'], allow_none=True, help=""" The command used for starting the single-user server. Provide either a string or a list containing the path to the startup script command. Extra arguments, other than this path, should be provided via `args`. This is usually set if you want to start the single-user server in a different python environment (with virtualenv/conda) than JupyterHub itself. Some spawners allow shell-style expansion here, allowing you to use environment variables. Most, including the default, do not. Consult the documentation for your spawner to verify! """ ).tag(config=True) args = List(Unicode(), help=""" Extra arguments to be passed to the single-user server. Some spawners allow shell-style expansion here, allowing you to use environment variables here. Most, including the default, do not. Consult the documentation for your spawner to verify! """ ).tag(config=True) notebook_dir = Unicode( help=""" Path to the notebook directory for the single-user server. The user sees a file listing of this directory when the notebook interface is started. The current interface does not easily allow browsing beyond the subdirectories in this directory's tree. `~` will be expanded to the home directory of the user, and {username} will be replaced with the name of the user. Note that this does *not* prevent users from accessing files outside of this path! They can do so with many other means. """ ).tag(config=True) default_url = Unicode( help=""" The URL the single-user server should start in. `{username}` will be expanded to the user's username Example uses: - You can set `notebook_dir` to `/` and `default_url` to `/tree/home/{username}` to allow people to navigate the whole filesystem from their notebook server, but still start in their home directory. - Start with `/notebooks` instead of `/tree` if `default_url` points to a notebook instead of a directory. - You can set this to `/lab` to have JupyterLab start by default, rather than Jupyter Notebook. """ ).tag(config=True) @validate('notebook_dir', 'default_url') def _deprecate_percent_u(self, proposal): v = proposal['value'] if '%U' in v: self.log.warning("%%U for username in %s is deprecated in JupyterHub 0.7, use {username}", proposal['trait'].name, ) v = v.replace('%U', '{username}') self.log.warning("Converting %r to %r", proposal['value'], v) return v disable_user_config = Bool(False, help=""" Disable per-user configuration of single-user servers. When starting the user's single-user server, any config file found in the user's $HOME directory will be ignored. Note: a user could circumvent this if the user modifies their Python environment, such as when they have their own conda environments / virtualenvs / containers. """ ).tag(config=True) mem_limit = ByteSpecification(None, help=""" Maximum number of bytes a single-user notebook server is allowed to use. Allows the following suffixes: - K -> Kilobytes - M -> Megabytes - G -> Gigabytes - T -> Terabytes If the single user server tries to allocate more memory than this, it will fail. There is no guarantee that the single-user notebook server will be able to allocate this much memory - only that it can not allocate more than this. **This is a configuration setting. Your spawner must implement support for the limit to work.** The default spawner, `LocalProcessSpawner`, does **not** implement this support. A custom spawner **must** add support for this setting for it to be enforced. """ ).tag(config=True) cpu_limit = Float(None, allow_none=True, help=""" Maximum number of cpu-cores a single-user notebook server is allowed to use. If this value is set to 0.5, allows use of 50% of one CPU. If this value is set to 2, allows use of up to 2 CPUs. The single-user notebook server will never be scheduled by the kernel to use more cpu-cores than this. There is no guarantee that it can access this many cpu-cores. **This is a configuration setting. Your spawner must implement support for the limit to work.** The default spawner, `LocalProcessSpawner`, does **not** implement this support. A custom spawner **must** add support for this setting for it to be enforced. """ ).tag(config=True) mem_guarantee = ByteSpecification(None, help=""" Minimum number of bytes a single-user notebook server is guaranteed to have available. Allows the following suffixes: - K -> Kilobytes - M -> Megabytes - G -> Gigabytes - T -> Terabytes **This is a configuration setting. Your spawner must implement support for the limit to work.** The default spawner, `LocalProcessSpawner`, does **not** implement this support. A custom spawner **must** add support for this setting for it to be enforced. """ ).tag(config=True) cpu_guarantee = Float(None, allow_none=True, help=""" Minimum number of cpu-cores a single-user notebook server is guaranteed to have available. If this value is set to 0.5, allows use of 50% of one CPU. If this value is set to 2, allows use of up to 2 CPUs. **This is a configuration setting. Your spawner must implement support for the limit to work.** The default spawner, `LocalProcessSpawner`, does **not** implement this support. A custom spawner **must** add support for this setting for it to be enforced. """ ).tag(config=True) pre_spawn_hook = Any( help=""" An optional hook function that you can implement to do some bootstrapping work before the spawner starts. For example, create a directory for your user or load initial content. This can be set independent of any concrete spawner implementation. Example:: from subprocess import check_call def my_hook(spawner): username = spawner.user.name check_call(['./examples/bootstrap-script/bootstrap.sh', username]) c.Spawner.pre_spawn_hook = my_hook """ ).tag(config=True) def load_state(self, state): """Restore state of spawner from database. Called for each user's spawner after the hub process restarts. `state` is a dict that'll contain the value returned by `get_state` of the spawner, or {} if the spawner hasn't persisted any state yet. Override in subclasses to restore any extra state that is needed to track the single-user server for that user. Subclasses should call super(). """ pass def get_state(self): """Save state of spawner into database. A black box of extra state for custom spawners. The returned value of this is passed to `load_state`. Subclasses should call `super().get_state()`, augment the state returned from there, and return that state. Returns ------- state: dict a JSONable dict of state """ state = {} return state def clear_state(self): """Clear any state that should be cleared when the single-user server stops. State that should be preserved across single-user server instances should not be cleared. Subclasses should call super, to ensure that state is properly cleared. """ self.api_token = '' def get_env(self): """Return the environment dict to use for the Spawner. This applies things like `env_keep`, anything defined in `Spawner.environment`, and adds the API token to the env. When overriding in subclasses, subclasses must call `super().get_env()`, extend the returned dict and return it. Use this to access the env in Spawner.start to allow extension in subclasses. """ env = {} if self.env: warnings.warn("Spawner.env is deprecated, found %s" % self.env, DeprecationWarning) env.update(self.env) for key in self.env_keep: if key in os.environ: env[key] = os.environ[key] # config overrides. If the value is a callable, it will be called with # one parameter - the current spawner instance - and the return value # will be assigned to the environment variable. This will be called at # spawn time. for key, value in self.environment.items(): if callable(value): env[key] = value(self) else: env[key] = value env['JUPYTERHUB_API_TOKEN'] = self.api_token # deprecated (as of 0.7.2), for old versions of singleuser env['JPY_API_TOKEN'] = self.api_token if self.admin_access: env['JUPYTERHUB_ADMIN_ACCESS'] = '1' # OAuth settings env['JUPYTERHUB_CLIENT_ID'] = self.oauth_client_id env['JUPYTERHUB_HOST'] = self.hub.public_host env['JUPYTERHUB_OAUTH_CALLBACK_URL'] = \ url_path_join(self.user.url, self.name, 'oauth_callback') # Info previously passed on args env['JUPYTERHUB_USER'] = self.user.name env['JUPYTERHUB_API_URL'] = self.hub.api_url env['JUPYTERHUB_BASE_URL'] = self.hub.base_url[:-4] if self.server: env['JUPYTERHUB_SERVICE_PREFIX'] = self.server.base_url # Put in limit and guarantee info if they exist. # Note that this is for use by the humans / notebook extensions in the # single-user notebook server, and not for direct usage by the spawners # themselves. Spawners should just use the traitlets directly. if self.mem_limit: env['MEM_LIMIT'] = str(self.mem_limit) if self.mem_guarantee: env['MEM_GUARANTEE'] = str(self.mem_guarantee) if self.cpu_limit: env['CPU_LIMIT'] = str(self.cpu_limit) if self.cpu_guarantee: env['CPU_GUARANTEE'] = str(self.cpu_guarantee) return env def template_namespace(self): """Return the template namespace for format-string formatting. Currently used on default_url and notebook_dir. Subclasses may add items to the available namespace. The default implementation includes:: { 'username': user.name, 'base_url': users_base_url, } Returns: ns (dict): namespace for string formatting. """ d = {'username': self.user.name} if self.server: d['base_url'] = self.server.base_url return d def format_string(self, s): """Render a Python format string Uses :meth:`Spawner.template_namespace` to populate format namespace. Args: s (str): Python format-string to be formatted. Returns: str: Formatted string, rendered """ return s.format(**self.template_namespace()) def get_args(self): """Return the arguments to be passed after self.cmd Doesn't expect shell expansion to happen. """ args = [] if self.ip: args.append('--ip="%s"' % self.ip) if self.port: args.append('--port=%i' % self.port) elif self.server.port: self.log.warning("Setting port from user.server is deprecated as of JupyterHub 0.7.") args.append('--port=%i' % self.server.port) if self.notebook_dir: notebook_dir = self.format_string(self.notebook_dir) args.append('--notebook-dir="%s"' % notebook_dir) if self.default_url: default_url = self.format_string(self.default_url) args.append('--NotebookApp.default_url="%s"' % default_url) if self.debug: args.append('--debug') if self.disable_user_config: args.append('--disable-user-config') args.extend(self.args) return args def run_pre_spawn_hook(self): """Run the pre_spawn_hook if defined""" if self.pre_spawn_hook: return self.pre_spawn_hook(self) @gen.coroutine def start(self): """Start the single-user server Returns: (str, int): the (ip, port) where the Hub can connect to the server. .. versionchanged:: 0.7 Return ip, port instead of setting on self.user.server directly. """ raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def stop(self, now=False): """Stop the single-user server If `now` is False (default), shutdown the server as gracefully as possible, e.g. starting with SIGINT, then SIGTERM, then SIGKILL. If `now` is True, terminate the server immediately. The coroutine should return when the single-user server process is no longer running. Must be a coroutine. """ raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") @gen.coroutine def poll(self): """Check if the single-user process is running Returns: None if single-user process is running. Integer exit status (0 if unknown), if it is not running. State transitions, behavior, and return response: - If the Spawner has not been initialized (neither loaded state, nor called start), it should behave as if it is not running (status=0). - If the Spawner has not finished starting, it should behave as if it is running (status=None). Design assumptions about when `poll` may be called: - On Hub launch: `poll` may be called before `start` when state is loaded on Hub launch. `poll` should return exit status 0 (unknown) if the Spawner has not been initialized via `load_state` or `start`. - If `.start()` is async: `poll` may be called during any yielded portions of the `start` process. `poll` should return None when `start` is yielded, indicating that the `start` process has not yet completed. """ raise NotImplementedError("Override in subclass. Must be a Tornado gen.coroutine.") def add_poll_callback(self, callback, *args, **kwargs): """Add a callback to fire when the single-user server stops""" if args or kwargs: cb = callback callback = lambda: cb(*args, **kwargs) self._callbacks.append(callback) def stop_polling(self): """Stop polling for single-user server's running state""" if self._poll_callback: self._poll_callback.stop() self._poll_callback = None def start_polling(self): """Start polling periodically for single-user server's running state. Callbacks registered via `add_poll_callback` will fire if/when the server stops. Explicit termination via the stop method will not trigger the callbacks. """ if self.poll_interval <= 0: self.log.debug("Not polling subprocess") return else: self.log.debug("Polling subprocess every %is", self.poll_interval) self.stop_polling() self._poll_callback = PeriodicCallback( self.poll_and_notify, 1e3 * self.poll_interval ) self._poll_callback.start() @gen.coroutine def poll_and_notify(self): """Used as a callback to periodically poll the process and notify any watchers""" status = yield self.poll() if status is None: # still running, nothing to do here return self.stop_polling() # clear callbacks list self._callbacks, callbacks = ([], self._callbacks) for callback in callbacks: try: yield gen.maybe_future(callback()) except Exception: self.log.exception("Unhandled error in poll callback for %s", self) return status death_interval = Float(0.1) @gen.coroutine def wait_for_death(self, timeout=10): """Wait for the single-user server to die, up to timeout seconds""" @gen.coroutine def _wait_for_death(): status = yield self.poll() return status is not None try: r = yield exponential_backoff( _wait_for_death, 'Process did not die in {timeout} seconds'.format(timeout=timeout), start_wait=self.death_interval, timeout=timeout, ) return r except TimeoutError: return False
def thread_exception_hanlder(self, action_timeout=5): if action_timeout > 0: _exception_async_t = PeriodicCallback( self.__create_worker_exception_handler, action_timeout * 1000) _exception_async_t.start()
class LCD: def __init__(self, ctrl): self.ctrl = ctrl self.addrs = self.ctrl.args.lcd_addr self.addr = self.addrs[0] self.addr_num = 0 self.width = 20 self.height = 4 self.lcd = None self.timeout = None self.reset = False self.page = None self.pages = [] self.current_page = 0 self.screen = self.new_screen() self.set_message('Loading...') # Redraw screen every 5 seconds self.redraw_timer = PeriodicCallback(self._redraw, 5000, ctrl.ioloop) self.redraw_timer.start() atexit.register(self.goodbye) def set_message(self, msg): try: self.load_page(LCDPage(self, msg)) self._update() except IOError as e: log.warning('LCD communication failed: %s' % e) def new_screen(self): return [[' ' for y in range(self.height)] for x in range(self.width)] def new_page(self): return LCDPage(self) def add_page(self, page): self.pages.append(page) def add_new_page(self, page=None): if page is None: page = self.new_page() page.id = len(self.pages) self.add_page(page) return page def load_page(self, page): if self.page != page: if self.page is not None: self.page.deactivate() page.activate() self.page = page self.redraw = True self.update() def set_current_page(self, current_page): self.current_page = current_page % len(self.pages) self.load_page(self.pages[self.current_page]) def page_up(self): pass def page_down(self): pass def page_right(self): self.set_current_page(self.current_page + 1) def page_left(self): self.set_current_page(self.current_page - 1) def update(self): if self.timeout is None: self.timeout = self.ctrl.ioloop.call_later(0.25, self._update) def _redraw(self): self.redraw = True self.update() def _update(self): self.timeout = None try: if self.lcd is None: self.lcd = lcd.LCD(self.ctrl.i2c, self.addr, self.height, self.width) if self.reset: self.lcd.reset() self.redraw = True self.reset = False cursorX, cursorY = -1, -1 for y in range(self.height): for x in range(self.width): c = self.page.data[x][y] if self.redraw or self.screen[x][y] != c: if cursorX != x or cursorY != y: self.lcd.goto(x, y) cursorX, cursorY = x, y self.lcd.put_char(c) cursorX += 1 self.screen[x][y] = c self.redraw = False except IOError as e: # Try next address self.addr_num += 1 if len(self.addrs) <= self.addr_num: self.addr_num = 0 self.addr = self.addrs[self.addr_num] self.lcd = None log.warning('LCD communication failed, ' + 'retrying on address 0x%02x: %s' % (self.addr, e)) self.reset = True self.timeout = self.ctrl.ioloop.call_later(1, self._update) def goodbye(self, message=''): if self.timeout: self.ctrl.ioloop.remove_timeout(self.timeout) self.timeout = None if self.redraw_timer: self.redraw_timer.stop() self.redraw_timer = None if self.lcd is not None: self.set_message(message)
def open(self): print('open connection') self.send_entries() self.callback = PeriodicCallback(self.send_entries, 5000) self.callback.start()
class InstanceManager(object): """ Fulfills AppServer instance assignments from the scheduler. """ # The seconds to wait between performing health checks. HEALTH_CHECK_INTERVAL = 60 def __init__(self, zk_client, service_operator, routing_client, projects_manager, deployment_config, source_manager, syslog_server, thread_pool, private_ip): """ Creates a new InstanceManager. Args: zk_client: A kazoo.client.KazooClient object. service_operator: An appscale.common.service_helper.ServiceOperator object. routing_client: An instance_manager.routing_client.RoutingClient object. projects_manager: A ProjectsManager object. deployment_config: A common.deployment_config.DeploymentConfig object. source_manager: An instance_manager.source_manager.SourceManager object. syslog_server: A string specifying the location of the syslog process that generates the combined app logs. thread_pool: A ThreadPoolExecutor. private_ip: A string specifying the current machine's private IP address. """ self._service_operator = service_operator self._routing_client = routing_client self._private_ip = private_ip self._syslog_server = syslog_server self._projects_manager = projects_manager self._deployment_config = deployment_config self._source_manager = source_manager self._thread_pool = thread_pool self._zk_client = zk_client # Ensures only one process tries to make changes at a time. self._work_lock = AsyncLock() self._health_checker = PeriodicCallback( self._ensure_health, self.HEALTH_CHECK_INTERVAL * 1000) # Instances that this machine should run. # For example, {guestbook_default_v1: [20000, -1]} self._assignments = None # List of API server ports by project id. There may be an api server and # a python runtime api server per project. self._api_servers = {} self._running_instances = set() self._login_server = None def start(self): """ Begins processes needed to fulfill instance assignments. """ # Update list of running instances in case the InstanceManager was # restarted. self._recover_state() # Subscribe to changes in controller state, which includes assignments and # the 'login' property. self._zk_client.DataWatch(CONTROLLER_STATE_NODE, self._controller_state_watch) # Subscribe to changes in project configuration, including relevant # versions. self._projects_manager.subscriptions.append( self._handle_configuration_update) # Start the regular health check. self._health_checker.start() @gen.coroutine def _start_instance(self, version, port): """ Starts a Google App Engine application on this machine. It will start it up and then proceed to fetch the main page. Args: version: A Version object. port: An integer specifying a port to use. """ version_details = version.version_details runtime = version_details['runtime'] env_vars = version_details.get('envVariables', {}) runtime_params = self._deployment_config.get_config( 'runtime_parameters') max_memory = runtime_params.get('default_max_appserver_memory', DEFAULT_MAX_APPSERVER_MEMORY) if 'instanceClass' in version_details: max_memory = INSTANCE_CLASSES.get(version_details['instanceClass'], max_memory) source_archive = version_details['deployment']['zip']['sourceUrl'] http_port = version_details['appscaleExtensions']['httpPort'] api_server_port, api_services = yield self._ensure_api_server( version.project_id, runtime) yield self._source_manager.ensure_source(version.revision_key, source_archive, runtime) logger.info('Starting {}:{}'.format(version, port)) pidfile = PIDFILE_TEMPLATE.format(revision=version.revision_key, port=port) if runtime == GO: env_vars['GOPATH'] = os.path.join(UNPACK_ROOT, version.revision_key, 'gopath') env_vars['GOROOT'] = os.path.join(GO_SDK, 'goroot') if runtime in (PYTHON27, GO, PHP): start_cmd = create_python27_start_cmd(version.project_id, self._login_server, port, pidfile, version.revision_key, api_server_port) env_vars.update( create_python_app_env(self._login_server, version.project_id)) elif runtime in (JAVA, JAVA8): # Account for MaxPermSize (~170MB), the parent process (~50MB), and thread # stacks (~20MB). max_heap = max_memory - 250 if max_heap <= 0: raise BadConfigurationException( 'Memory for Java applications must be greater than 250MB') start_cmd = create_java_start_cmd(version.project_id, port, http_port, self._login_server, max_heap, pidfile, version.revision_key, api_server_port, runtime) env_vars.update( create_java_app_env(self._deployment_config, runtime, version.project_id)) else: raise BadConfigurationException('Unknown runtime {} for {}'.format( runtime, version.project_id)) logger.info("Start command: " + str(start_cmd)) logger.info("Environment variables: " + str(env_vars)) env_content = ' '.join( ['{}="{}"'.format(k, str(v)) for k, v in env_vars.items()]) command_content = 'exec env {} {}'.format(env_content, start_cmd) service_inst = '{}-{}'.format(version.revision_key, port) service_name = 'appscale-instance-run@{}'.format(service_inst) service_props = {'MemoryLimit': '{}M'.format(max_memory)} command_file_path = '/run/appscale/apps/command_{}'.format( service_inst) file_io.write(command_file_path, command_content) yield self._service_operator.start_async(service_name, wants=api_services, properties=service_props) # Make sure the version registration node exists. self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version.version_key])) instance = Instance(version.revision_key, port) yield self._add_routing(instance) if version.project_id == DASHBOARD_PROJECT_ID: log_size = DASHBOARD_LOG_SIZE else: log_size = APP_LOG_SIZE if not setup_logrotate(version.project_id, log_size): logger.error( "Error while setting up log rotation for application: {}". format(version.project_id)) @gen.coroutine def populate_api_servers(self): """ Find running API servers. """ def api_server_info(entry): prefix, port = entry.rsplit('-', 1) index = 0 project_id = prefix[len(API_SERVER_PREFIX):] index_and_id = project_id.split('_', 1) if len(index_and_id) > 1: index = int(index_and_id[0]) project_id = index_and_id[1] return project_id, index, int(port) service_entries = yield self._service_operator.list_async() service_entry_list = [ entry for entry in service_entries if entry.startswith(API_SERVER_PREFIX) ] service_entry_list.sort() server_entries = [ api_server_info(entry) for entry in service_entry_list ] for project_id, index, port in server_entries: ports = (self._api_servers[project_id] if project_id in self._api_servers else []) if not ports: ports = [port] self._api_servers[project_id] = ports else: ports.insert(index, port) def _recover_state(self): """ Establishes current state from services. """ logger.info('Getting current state') service_entries = self._service_operator.list() instance_entries = { entry: state for entry, state in service_entries.items() if entry.startswith(SERVICE_INSTANCE_PREFIX) } instance_details = [] for entry, state in instance_entries.items(): revision, port = entry[entry.find('@') + 1:].rsplit('-', 2) instance_details.append({ 'revision': revision, 'port': int(port), 'state': state }) # Ensure version nodes exist. running_versions = { '_'.join(instance['revision'].split('_')[:3]) for instance in instance_details } self._zk_client.ensure_path(VERSION_REGISTRATION_NODE) for version_key in running_versions: self._zk_client.ensure_path('/'.join( [VERSION_REGISTRATION_NODE, version_key])) # Account for monitored instances. running_instances = { Instance(instance['revision'], instance['port']) for instance in instance_details } self._routing_client.declare_instance_nodes(running_instances) self._running_instances = running_instances @gen.coroutine def _ensure_api_server(self, project_id, runtime): """ Make sure there is a running API server for a project. Args: project_id: A string specifying the project ID. runtime: The runtime for the project Returns: An integer specifying the API server port and list of api services. """ ensure_app_server_api = runtime == JAVA8 if project_id in self._api_servers: api_server_ports = self._api_servers[project_id] if not ensure_app_server_api: raise gen.Return((api_server_ports[0], [ 'appscale-api-server@{}-{}'.format( project_id, str(api_server_ports[0])) ])) elif len(api_server_ports) > 1: raise gen.Return((api_server_ports[1], [ 'appscale-api-server@{}-{}'.format( project_id, str(api_server_ports[0])), 'appscale-api-server@1_{}-{}'.format( project_id, str(api_server_ports[1])) ])) server_port = MAX_API_SERVER_PORT for ports in self._api_servers.values(): for port in ports: if port <= server_port: server_port = port - 1 api_services = [] if not project_id in self._api_servers: watch = ''.join([API_SERVER_PREFIX, project_id]) zk_locations = appscale_info.get_zk_node_ips() start_cmd = ' '.join([ API_SERVER_LOCATION, '--port', str(server_port), '--project-id', project_id, '--zookeeper-locations', ' '.join(zk_locations) ]) api_command_file_path = ( '/run/appscale/apps/api_command_{}-{}'.format( project_id, str(server_port))) api_command_content = 'exec {}'.format(start_cmd) file_io.write(api_command_file_path, api_command_content) api_server_port = server_port else: api_server_port = self._api_servers[project_id][0] api_services.append('appscale-api-server@{}-{}'.format( project_id, str(api_server_port))) if ensure_app_server_api: # Start an Python 27 runtime API server if api_server_port == server_port: server_port -= 1 start_cmd = create_python_api_start_cmd(project_id, self._login_server, server_port, api_server_port) api_command_file_path = ( '/run/appscale/apps/api_command_1_{}-{}'.format( project_id, str(server_port))) api_command_content = 'exec {}'.format(start_cmd) file_io.write(api_command_file_path, api_command_content) api_services.append('appscale-api-server@{}-{}'.format( project_id, str(server_port))) self._api_servers[project_id] = [api_server_port, server_port] else: self._api_servers[project_id] = [server_port] raise gen.Return((server_port, api_services)) def _instance_healthy(self, port): """ Determines the health of an instance with an HTTP request. Args: port: An integer specifying the port the instance is listening on. Returns: A boolean indicating whether or not the instance is healthy. """ url = "http://" + self._private_ip + ":" + str(port) + FETCH_PATH try: opener = urllib2.build_opener(NoRedirection) response = opener.open(url, timeout=HEALTH_CHECK_TIMEOUT) if response.code == httplib.SERVICE_UNAVAILABLE: return False except IOError: return False return True @gen.coroutine def _wait_for_app(self, port): """ Waits for the application hosted on this machine, on the given port, to respond to HTTP requests. Args: port: Port where app is hosted on the local machine Returns: True on success, False otherwise """ deadline = monotonic.monotonic() + START_APP_TIMEOUT while monotonic.monotonic() < deadline: if self._instance_healthy(port): raise gen.Return(True) logger.debug('Instance at port {} is not ready yet'.format(port)) yield gen.sleep(BACKOFF_TIME) raise gen.Return(False) def _instance_service_name(self, instance): return ''.join([ 'appscale-instance-run@', instance.revision_key, '-', str(instance.port) ]) @gen.coroutine def _add_routing(self, instance): """ Tells the AppController to begin routing traffic to an AppServer. Args: instance: An Instance. """ logger.info('Waiting for {}'.format(instance)) start_successful = yield self._wait_for_app(instance.port) if not start_successful: instance_service = self._instance_service_name(instance) yield self._service_operator.stop_async(instance_service) logger.warning('{} did not come up in time'.format(instance)) return self._routing_client.register_instance(instance) self._running_instances.add(instance) @gen.coroutine def _clean_old_sources(self): """ Removes source code for obsolete revisions. """ service_entries = yield self._service_operator.list_async() active_revisions = { entry[len(SERVICE_INSTANCE_PREFIX):].rsplit('-', 1)[0] for entry in service_entries if entry.startswith(SERVICE_INSTANCE_PREFIX) } for project_id, project_manager in self._projects_manager.items(): for service_id, service_manager in project_manager.items(): for version_id, version_manager in service_manager.items(): revision_id = version_manager.version_details['revision'] revision_key = VERSION_PATH_SEPARATOR.join( [project_id, service_id, version_id, str(revision_id)]) active_revisions.add(revision_key) self._source_manager.clean_old_revisions( active_revisions=active_revisions) @gen.coroutine def _stop_app_instance(self, instance): """ Stops a Google App Engine application process instance on current machine. Args: instance: An Instance object. """ logger.info('Stopping {}'.format(instance)) instance_service = self._instance_service_name(instance) self._routing_client.unregister_instance(instance) try: self._running_instances.remove(instance) except KeyError: logger.info('unregister_instance: non-existent instance {}'.format( instance)) yield self._service_operator.stop_async(instance_service) project_instances = [ instance_ for instance_ in self._running_instances if instance_.project_id == instance.project_id ] if not project_instances: remove_logrotate(instance.project_id) yield self._clean_old_sources() def _get_lowest_port(self): """ Determines the lowest usuable port for a new instance. Returns: An integer specifying a free port. """ existing_ports = { instance.port for instance in self._running_instances } port = STARTING_INSTANCE_PORT while True: if port in existing_ports: port += 1 continue return port @gen.coroutine def _restart_unrouted_instances(self): """ Restarts instances that the router considers offline. """ with (yield self._work_lock.acquire()): failed_instances = yield self._routing_client.get_failed_instances( ) for version_key, port in failed_instances: try: instance = next(instance for instance in self._running_instances if instance.version_key == version_key and instance.port == port) except StopIteration: # If the manager has no recored of that instance, remove routing. self._routing_client.unregister_instance( Instance(version_key, port)) continue try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning( 'Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _restart_unavailable_instances(self): """ Restarts instances that fail health check requests. """ with (yield self._work_lock.acquire()): for instance in self._running_instances: # TODO: Add a threshold to avoid restarting on a transient error. if not self._instance_healthy(instance.port): try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue logger.warning( 'Restarting failed instance: {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) @gen.coroutine def _ensure_health(self): """ Checks to make sure all required instances are running and healthy. """ yield self._restart_unrouted_instances() yield self._restart_unavailable_instances() # Just as an infrequent sanity check, fulfill assignments and enforce # instance details. yield self._fulfill_assignments() yield self._enforce_instance_details() @gen.coroutine def _fulfill_assignments(self): """ Starts and stops instances in order to fulfill assignments. """ # If the manager has not been able to retrieve a valid set of assignments, # don't do any work. if self._assignments is None: return if self._login_server is None: return with (yield self._work_lock.acquire()): # Stop versions that aren't assigned. to_stop = [ instance for instance in self._running_instances if instance.version_key not in self._assignments ] for version_key in {instance.version_key for instance in to_stop}: logger.info('{} is no longer assigned'.format(version_key)) for instance in to_stop: yield self._stop_app_instance(instance) for version_key, assigned_ports in self._assignments.items(): try: version = self._projects_manager.version_from_key( version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue # The number of required instances that don't have an assigned port. new_assignment_count = sum(port == -1 for port in assigned_ports) # Stop instances that aren't assigned. If the assignment list includes # any -1s, match them to running instances that aren't in the assigned # ports list. candidates = [ instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports ] unmatched_instances = candidates[new_assignment_count:] for running_instance in unmatched_instances: logger.info( '{} is no longer assigned'.format(running_instance)) yield self._stop_app_instance(running_instance) # Start defined ports that aren't running. running_ports = [ instance.port for instance in self._running_instances if instance.version_key == version_key ] for port in assigned_ports: if port != -1 and port not in running_ports: yield self._start_instance(version, port) # Start new assignments that don't have a match. candidates = [ instance for instance in self._running_instances if instance.version_key == version_key and instance.port not in assigned_ports ] to_start = max(new_assignment_count - len(candidates), 0) for _ in range(to_start): yield self._start_instance(version, self._get_lowest_port()) @gen.coroutine def _enforce_instance_details(self): """ Ensures all running instances are configured correctly. """ with (yield self._work_lock.acquire()): # Restart instances with an outdated revision or login server. for instance in self._running_instances: try: version = self._projects_manager.version_from_key( instance.version_key) except KeyError: # If the version no longer exists, avoid doing any work. The # scheduler should remove any assignments for it. continue instance_login_server = get_login_server(instance) login_server_changed = ( instance_login_server is not None and self._login_server is not None and self._login_server != instance_login_server) if (instance.revision_key != version.revision_key or login_server_changed): logger.info( 'Configuration changed for {}'.format(instance)) yield self._stop_app_instance(instance) yield self._start_instance(version, instance.port) def _assignments_from_state(self, controller_state): """ Extracts the current machine's assignments from controller state. Args: controller_state: A dictionary containing controller state. """ def version_assignments(data): return [ int(server.split(':')[1]) for server in data['appservers'] if server.split(':')[0] == self._private_ip ] return { version_key: version_assignments(data) for version_key, data in controller_state['@app_info_map'].items() if version_assignments(data) } @gen.coroutine def _update_controller_state(self, encoded_controller_state): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ try: controller_state = json.loads(encoded_controller_state) except (TypeError, ValueError): # If the controller state isn't usable, don't do any work. logger.warning('Invalid controller state: {}'.format( encoded_controller_state)) return new_assignments = self._assignments_from_state(controller_state) login_server = controller_state['@options']['login'] if new_assignments != self._assignments: logger.info('New assignments: {}'.format(new_assignments)) self._assignments = new_assignments yield self._fulfill_assignments() if login_server != self._login_server: logger.info('New login server: {}'.format(login_server)) self._login_server = login_server yield self._enforce_instance_details() def _controller_state_watch(self, encoded_controller_state, _): """ Handles updates to controller state. Args: encoded_controller_state: A JSON-encoded string containing controller state. """ persistent_update_controller_state = retry_data_watch_coroutine( CONTROLLER_STATE_NODE, self._update_controller_state) IOLoop.instance().add_callback(persistent_update_controller_state, encoded_controller_state) @gen.coroutine def _handle_configuration_update(self, event): """ Handles updates to a project's configuration details. Args: event: An appscale.admin.instance_manager.projects_manager.Event object. """ relevant_versions = { instance.version_key for instance in self._running_instances } if self._assignments is not None: relevant_versions |= set(self._assignments.keys()) for version_key in relevant_versions: if event.affects_version(version_key): logger.info('New revision for version: {}'.format(version_key)) yield self._enforce_instance_details() break
class MappingKernelManager(MultiKernelManager): """A KernelManager that handles - File mapping - HTTP error handling - Kernel message filtering """ @default("kernel_manager_class") def _default_kernel_manager_class(self): return "jupyter_client.ioloop.IOLoopKernelManager" kernel_argv = List(Unicode()) root_dir = Unicode(config=True) _kernel_connections = Dict() _culler_callback = None _initialized_culler = False @default("root_dir") def _default_root_dir(self): try: return self.parent.root_dir except AttributeError: return getcwd() @validate("root_dir") def _update_root_dir(self, proposal): """Do a bit of validation of the root dir.""" value = proposal["value"] if not os.path.isabs(value): # If we receive a non-absolute path, make it absolute. value = os.path.abspath(value) if not exists(value) or not os.path.isdir(value): raise TraitError("kernel root dir %r is not a directory" % value) return value cull_idle_timeout = Integer( 0, config=True, help= """Timeout (in seconds) after which a kernel is considered idle and ready to be culled. Values of 0 or lower disable culling. Very short timeouts may result in kernels being culled for users with poor network connections.""", ) cull_interval_default = 300 # 5 minutes cull_interval = Integer( cull_interval_default, config=True, help= """The interval (in seconds) on which to check for idle kernels exceeding the cull timeout value.""", ) cull_connected = Bool( False, config=True, help= """Whether to consider culling kernels which have one or more connections. Only effective if cull_idle_timeout > 0.""", ) cull_busy = Bool( False, config=True, help="""Whether to consider culling kernels which are busy. Only effective if cull_idle_timeout > 0.""", ) buffer_offline_messages = Bool( True, config=True, help= """Whether messages from kernels whose frontends have disconnected should be buffered in-memory. When True (default), messages are buffered and replayed on reconnect, avoiding lost messages due to interrupted connectivity. Disable if long-running kernels will produce too much output while no frontends are connected. """, ) kernel_info_timeout = Float( 60, config=True, help="""Timeout for giving up on a kernel (in seconds). On starting and restarting kernels, we check whether the kernel is running and responsive by sending kernel_info_requests. This sets the timeout in seconds for how long the kernel can take before being presumed dead. This affects the MappingKernelManager (which handles kernel restarts) and the ZMQChannelsHandler (which handles the startup). """, ) _kernel_buffers = Any() @default("_kernel_buffers") def _default_kernel_buffers(self): return defaultdict(lambda: { "buffer": [], "session_key": "", "channels": {} }) last_kernel_activity = Instance( datetime, help= "The last activity on any kernel, including shutting down a kernel", ) def __init__(self, **kwargs): super(MappingKernelManager, self).__init__(**kwargs) self.last_kernel_activity = utcnow() allowed_message_types = List( trait=Unicode(), config=True, help="""White list of allowed kernel message types. When the list is empty, all message types are allowed. """, ) # ------------------------------------------------------------------------- # Methods for managing kernels and sessions # ------------------------------------------------------------------------- def _handle_kernel_died(self, kernel_id): """notice that a kernel died""" self.log.warning("Kernel %s died, removing from map.", kernel_id) self.remove_kernel(kernel_id) def cwd_for_path(self, path): """Turn API path into absolute OS path.""" os_path = to_os_path(path, self.root_dir) # in the case of documents and kernels not being on the same filesystem, # walk up to root_dir if the paths don't exist while not os.path.isdir(os_path) and os_path != self.root_dir: os_path = os.path.dirname(os_path) return os_path @gen.coroutine def start_kernel(self, kernel_id=None, path=None, **kwargs): """Start a kernel for a session and return its kernel_id. Parameters ---------- kernel_id : uuid The uuid to associate the new kernel with. If this is not None, this kernel will be persistent whenever it is requested. path : API path The API path (unicode, '/' delimited) for the cwd. Will be transformed to an OS path relative to root_dir. kernel_name : str The name identifying which kernel spec to launch. This is ignored if an existing kernel is returned, but it may be checked in the future. """ if kernel_id is None: if path is not None: kwargs["cwd"] = self.cwd_for_path(path) kernel_id = yield maybe_future( super(MappingKernelManager, self).start_kernel(**kwargs)) self._kernel_connections[kernel_id] = 0 self.start_watching_activity(kernel_id) self.log.info("Kernel started: %s" % kernel_id) self.log.debug("Kernel args: %r" % kwargs) # register callback for failed auto-restart self.add_restart_callback( kernel_id, lambda: self._handle_kernel_died(kernel_id), "dead", ) # Increase the metric of number of kernels running # for the relevant kernel type by 1 KERNEL_CURRENTLY_RUNNING_TOTAL.labels( type=self._kernels[kernel_id].kernel_name).inc() else: self._check_kernel_id(kernel_id) self.log.info("Using existing kernel: %s" % kernel_id) # Initialize culling if not already if not self._initialized_culler: self.initialize_culler() # py2-compat raise gen.Return(kernel_id) def start_buffering(self, kernel_id, session_key, channels): """Start buffering messages for a kernel Parameters ---------- kernel_id : str The id of the kernel to stop buffering. session_key: str The session_key, if any, that should get the buffer. If the session_key matches the current buffered session_key, the buffer will be returned. channels: dict({'channel': ZMQStream}) The zmq channels whose messages should be buffered. """ if not self.buffer_offline_messages: for channel, stream in channels.items(): stream.close() return self.log.info("Starting buffering for %s", session_key) self._check_kernel_id(kernel_id) # clear previous buffering state self.stop_buffering(kernel_id) buffer_info = self._kernel_buffers[kernel_id] # record the session key because only one session can buffer buffer_info["session_key"] = session_key # TODO: the buffer should likely be a memory bounded queue, we're starting with a list to keep it simple buffer_info["buffer"] = [] buffer_info["channels"] = channels # forward any future messages to the internal buffer def buffer_msg(channel, msg_parts): self.log.debug("Buffering msg on %s:%s", kernel_id, channel) buffer_info["buffer"].append((channel, msg_parts)) for channel, stream in channels.items(): stream.on_recv(partial(buffer_msg, channel)) def get_buffer(self, kernel_id, session_key): """Get the buffer for a given kernel Parameters ---------- kernel_id : str The id of the kernel to stop buffering. session_key: str, optional The session_key, if any, that should get the buffer. If the session_key matches the current buffered session_key, the buffer will be returned. """ self.log.debug("Getting buffer for %s", kernel_id) if kernel_id not in self._kernel_buffers: return buffer_info = self._kernel_buffers[kernel_id] if buffer_info["session_key"] == session_key: # remove buffer self._kernel_buffers.pop(kernel_id) # only return buffer_info if it's a match return buffer_info else: self.stop_buffering(kernel_id) def stop_buffering(self, kernel_id): """Stop buffering kernel messages Parameters ---------- kernel_id : str The id of the kernel to stop buffering. """ self.log.debug("Clearing buffer for %s", kernel_id) self._check_kernel_id(kernel_id) if kernel_id not in self._kernel_buffers: return buffer_info = self._kernel_buffers.pop(kernel_id) # close buffering streams for stream in buffer_info["channels"].values(): if not stream.closed(): stream.on_recv(None) stream.close() msg_buffer = buffer_info["buffer"] if msg_buffer: self.log.info( "Discarding %s buffered messages for %s", len(msg_buffer), buffer_info["session_key"], ) def shutdown_kernel(self, kernel_id, now=False): """Shutdown a kernel by kernel_id""" self._check_kernel_id(kernel_id) kernel = self._kernels[kernel_id] if kernel._activity_stream: kernel._activity_stream.close() kernel._activity_stream = None self.stop_buffering(kernel_id) self._kernel_connections.pop(kernel_id, None) # Decrease the metric of number of kernels # running for the relevant kernel type by 1 KERNEL_CURRENTLY_RUNNING_TOTAL.labels( type=self._kernels[kernel_id].kernel_name).dec() return super(MappingKernelManager, self).shutdown_kernel(kernel_id, now=now) @gen.coroutine def restart_kernel(self, kernel_id): """Restart a kernel by kernel_id""" self._check_kernel_id(kernel_id) yield maybe_future( super(MappingKernelManager, self).restart_kernel(kernel_id)) kernel = self.get_kernel(kernel_id) # return a Future that will resolve when the kernel has successfully restarted channel = kernel.connect_shell() future = Future() def finish(): """Common cleanup when restart finishes/fails for any reason.""" if not channel.closed(): channel.close() loop.remove_timeout(timeout) kernel.remove_restart_callback(on_restart_failed, "dead") def on_reply(msg): self.log.debug("Kernel info reply received: %s", kernel_id) finish() if not future.done(): future.set_result(msg) def on_timeout(): self.log.warning("Timeout waiting for kernel_info_reply: %s", kernel_id) finish() if not future.done(): future.set_exception( gen.TimeoutError("Timeout waiting for restart")) def on_restart_failed(): self.log.warning("Restarting kernel failed: %s", kernel_id) finish() if not future.done(): future.set_exception(RuntimeError("Restart failed")) kernel.add_restart_callback(on_restart_failed, "dead") kernel.session.send(channel, "kernel_info_request") channel.on_recv(on_reply) loop = IOLoop.current() timeout = loop.add_timeout(loop.time() + self.kernel_info_timeout, on_timeout) raise gen.Return(future) def notify_connect(self, kernel_id): """Notice a new connection to a kernel""" if kernel_id in self._kernel_connections: self._kernel_connections[kernel_id] += 1 def notify_disconnect(self, kernel_id): """Notice a disconnection from a kernel""" if kernel_id in self._kernel_connections: self._kernel_connections[kernel_id] -= 1 def kernel_model(self, kernel_id): """Return a JSON-safe dict representing a kernel For use in representing kernels in the JSON APIs. """ self._check_kernel_id(kernel_id) kernel = self._kernels[kernel_id] model = { "id": kernel_id, "name": kernel.kernel_name, "last_activity": isoformat(kernel.last_activity), "execution_state": kernel.execution_state, "connections": self._kernel_connections[kernel_id], } return model def list_kernels(self): """Returns a list of kernel_id's of kernels running.""" kernels = [] kernel_ids = super(MappingKernelManager, self).list_kernel_ids() for kernel_id in kernel_ids: model = self.kernel_model(kernel_id) kernels.append(model) return kernels # override _check_kernel_id to raise 404 instead of KeyError def _check_kernel_id(self, kernel_id): """Check a that a kernel_id exists and raise 404 if not.""" if kernel_id not in self: raise web.HTTPError(404, u"Kernel does not exist: %s" % kernel_id) # monitoring activity: def start_watching_activity(self, kernel_id): """Start watching IOPub messages on a kernel for activity. - update last_activity on every message - record execution_state from status messages """ kernel = self._kernels[kernel_id] # add busy/activity markers: kernel.execution_state = "starting" kernel.last_activity = utcnow() kernel._activity_stream = kernel.connect_iopub() session = Session( config=kernel.session.config, key=kernel.session.key, ) def record_activity(msg_list): """Record an IOPub message arriving from a kernel""" self.last_kernel_activity = kernel.last_activity = utcnow() idents, fed_msg_list = session.feed_identities(msg_list) msg = session.deserialize(fed_msg_list) msg_type = msg["header"]["msg_type"] if msg_type == "status": kernel.execution_state = msg["content"]["execution_state"] self.log.debug( "activity on %s: %s (%s)", kernel_id, msg_type, kernel.execution_state, ) else: self.log.debug("activity on %s: %s", kernel_id, msg_type) kernel._activity_stream.on_recv(record_activity) def initialize_culler(self): """Start idle culler if 'cull_idle_timeout' is greater than zero. Regardless of that value, set flag that we've been here. """ if not self._initialized_culler and self.cull_idle_timeout > 0: if self._culler_callback is None: loop = IOLoop.current() if self.cull_interval <= 0: # handle case where user set invalid value self.log.warning( "Invalid value for 'cull_interval' detected (%s) - using default value (%s).", self.cull_interval, self.cull_interval_default, ) self.cull_interval = self.cull_interval_default self._culler_callback = PeriodicCallback( self.cull_kernels, 1000 * self.cull_interval) self.log.info( "Culling kernels with idle durations > %s seconds at %s second intervals ...", self.cull_idle_timeout, self.cull_interval, ) if self.cull_busy: self.log.info("Culling kernels even if busy") if self.cull_connected: self.log.info( "Culling kernels even with connected clients") self._culler_callback.start() self._initialized_culler = True def cull_kernels(self): self.log.debug( "Polling every %s seconds for kernels idle > %s seconds...", self.cull_interval, self.cull_idle_timeout, ) """Create a separate list of kernels to avoid conflicting updates while iterating""" for kernel_id in list(self._kernels): try: self.cull_kernel_if_idle(kernel_id) except Exception as e: self.log.exception( "The following exception was encountered while checking the idle duration of kernel %s: %s", kernel_id, e, ) def cull_kernel_if_idle(self, kernel_id): kernel = self._kernels[kernel_id] self.log.debug( "kernel_id=%s, kernel_name=%s, last_activity=%s", kernel_id, kernel.kernel_name, kernel.last_activity, ) if kernel.last_activity is not None: dt_now = utcnow() dt_idle = dt_now - kernel.last_activity # Compute idle properties is_idle_time = dt_idle > timedelta(seconds=self.cull_idle_timeout) is_idle_execute = self.cull_busy or (kernel.execution_state != "busy") connections = self._kernel_connections.get(kernel_id, 0) is_idle_connected = self.cull_connected or not connections # Cull the kernel if all three criteria are met if is_idle_time and is_idle_execute and is_idle_connected: idle_duration = int(dt_idle.total_seconds()) self.log.warning( "Culling '%s' kernel '%s' (%s) with %d connections due to %s seconds of inactivity.", kernel.execution_state, kernel.kernel_name, kernel_id, connections, idle_duration, ) self.shutdown_kernel(kernel_id)
def run(self): self.ioloop = IOLoop.instance() self.alive = True self.server_alive = False if IOLOOP_PARAMETER_REMOVED: PeriodicCallback(self.watchdog, 1000).start() PeriodicCallback(self.heartbeat, 1000).start() else: PeriodicCallback(self.watchdog, 1000, io_loop=self.ioloop).start() PeriodicCallback(self.heartbeat, 1000, io_loop=self.ioloop).start() # Assume the app is a WSGI callable if its not an # instance of tornado.web.Application or is an # instance of tornado.wsgi.WSGIApplication app = self.wsgi if not isinstance(app, tornado.web.Application) or \ isinstance(app, tornado.wsgi.WSGIApplication): app = WSGIContainer(app) # Monkey-patching HTTPConnection.finish to count the # number of requests being handled by Tornado. This # will help gunicorn shutdown the worker if max_requests # is exceeded. httpserver = sys.modules["tornado.httpserver"] if hasattr(httpserver, 'HTTPConnection'): old_connection_finish = httpserver.HTTPConnection.finish def finish(other): self.handle_request() old_connection_finish(other) httpserver.HTTPConnection.finish = finish sys.modules["tornado.httpserver"] = httpserver server_class = tornado.httpserver.HTTPServer else: class _HTTPServer(tornado.httpserver.HTTPServer): def on_close(instance, server_conn): self.handle_request() super(_HTTPServer, instance).on_close(server_conn) server_class = _HTTPServer if self.cfg.is_ssl: _ssl_opt = copy.deepcopy(self.cfg.ssl_options) # tornado refuses initialization if ssl_options contains following # options del _ssl_opt["do_handshake_on_connect"] del _ssl_opt["suppress_ragged_eofs"] if IOLOOP_PARAMETER_REMOVED: server = server_class(app, ssl_options=_ssl_opt) else: server = server_class(app, io_loop=self.ioloop, ssl_options=_ssl_opt) else: if IOLOOP_PARAMETER_REMOVED: server = server_class(app) else: server = server_class(app, io_loop=self.ioloop) self.server = server self.server_alive = True for s in self.sockets: s.setblocking(0) if hasattr(server, "add_socket"): # tornado > 2.0 server.add_socket(s) elif hasattr(server, "_sockets"): # tornado 2.0 server._sockets[s.fileno()] = s server.no_keep_alive = self.cfg.keepalive <= 0 server.start(num_processes=1) self.ioloop.start()
async def _await_init(self): await self.connect() PeriodicCallback(self.keep_alive, 20000).start()
white_frame_rgb = cv2.cvtColor(white_frame, cv2.COLOR_BGR2RGB) disp_black_frame = convert_rgb_to_bokehrbga(black_frame_rgb) disp_white_frame = convert_rgb_to_bokehrbga(white_frame_rgb) def session_health_check(): print("health check") global stress_image, stress_data, session, count doc = session.document if stress_image is None: stress_image, stress_data = init_raw_image_window() doc.clear() doc.add_root(stress_image) doc.title = "Now with live updating!" if count % 2 == 0: stress_data.data_source.data = dict(image=[disp_black_frame]) else: stress_data.data_source.data = dict(image=[disp_white_frame]) count += 1 # scheduler for health check scheduler = PeriodicCallback(session_health_check, 1000.0) scheduler.start() session = pull_session( url="http://localhost:5006/stress-test", session_id="test_session", io_loop=scheduler.io_loop, ) scheduler.io_loop.start()
class BCSWebSocketHandler(tornado.websocket.WebSocketHandler): """WebSocket处理 """ def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.input_record = [] self.input_buffer = '' self.last_input_ts = IOLoop.current().time() self.record_callback = None self.tick_callback = None self.record_interval = 10 self.heartbeat_callback = None self.auditor = get_auditor() self.pod_life_cycle = PodLifeCycle() self.exit_buffer = '' self.exit_command = 'exit' self.user_pod_name = None self.source = None def check_origin(self, origin): return True @authenticated def get(self, *args, **kwargs): """只鉴权使用 """ return super().get(*args, **kwargs) def open(self, project_id, cluster_id, context): self.project_id = project_id self.cluster_id = cluster_id self.context = context self.user_pod_name = context['user_pod_name'] self.source = self.get_argument('source') rows = self.get_argument('rows') rows = utils.format_term_size(rows, constants.DEFAULT_ROWS) cols = self.get_argument('cols') cols = utils.format_term_size(cols, constants.DEFAULT_COLS) mode = context.get('mode') self.bcs_client = bcs_client.factory.create(mode, self, context, rows, cols) def on_message(self, message): self.last_input_ts = IOLoop.current().time() channel = int(message[0]) message = message[1:] if channel == constants.RESIZE_CHANNEL: rows, cols = message.split(',') rows = int(rows) cols = int(rows) self.bcs_client.set_pty_size(rows, cols) else: if message == '\r': if self.exit_buffer.lstrip().startswith(self.exit_command): self.write_message({ 'data': "BCS Console 主动退出", 'type': "exit_message" }) self.exit_buffer == '' else: self.exit_buffer += message self.send_message(message) def on_close(self): self.send_exit() if self.tick_callback: logger.info('stop tick callback, %s', self.user_pod_name) self.tick_callback.stop() if self.record_callback: logger.info('stop record_callback, %s', self.user_pod_name) self.record_callback.stop() if self.heartbeat_callback: logger.info('stop heartbeat_callback, %s', self.user_pod_name) self.heartbeat_callback.stop() logger.info("on_close") def send_exit(self): exit_msg = '\nexit\n' self.send_message(exit_msg) def flush_input_record(self): """获取输出记录 """ record = self.input_record[:] self.input_record = [] return record def tick_timeout(self): """主动停止掉session """ self.tick_callback = PeriodicCallback(self.periodic_tick, self.record_interval * 1000) self.tick_callback.start() def tick_timeout2client(self): """客户端退出 """ # 下发提示消息 tick_timeout_min = constants.TICK_TIMEOUT // 60 self.write_message({ 'data': f"BCS Console 已经{tick_timeout_min}分钟无操作", 'type': "exit_message" }) # 服务端退出bash, exit self.send_exit() def periodic_tick(self): now = IOLoop.current().time() idle_time = now - max(self.bcs_client.last_output_ts, self.last_input_ts) if idle_time > constants.TICK_TIMEOUT: self.tick_timeout2client() logger.info('tick timeout, close session %s, idle time, %.2f', self.user_pod_name, idle_time) logger.info('tick active %s, idle time, %.2f', self.user_pod_name, idle_time) def heartbeat(self): """每秒钟上报心跳 """ self.heartbeat_callback = PeriodicCallback( lambda: self.pod_life_cycle.heartbeat(self.user_pod_name), 1000) self.heartbeat_callback.start() def start_record(self): """操作审计""" self.record_callback = PeriodicCallback(self.periodic_record, self.record_interval * 1000) self.record_callback.start() def periodic_record(self): """周期上报操作记录 """ input_record = self.flush_input_record() output_record = self.bcs_client.flush_output_record() if not input_record and not output_record: return # 上报的数据 data = { 'input_record': '\r\n'.join(input_record), 'output_record': '\r\n'.join(output_record), 'session_id': self.context['session_id'], 'context': self.context, 'project_id': self.project_id, 'cluster_id': self.cluster_id, 'user_pod_name': self.user_pod_name, 'username': self.context['username'] } self.auditor.emit(data) logger.info(data) def send_message(self, message): if not self.bcs_client.ws or self.bcs_client.ws.stream.closed(): logger.info("session %s, close, message just ignore", self) return self.input_buffer += message if self.input_buffer.endswith(constants.INPUT_LINE_BREAKER): # line_msg = ['command', ''] line_msg = self.input_buffer.split(constants.INPUT_LINE_BREAKER) for i in line_msg[:-1]: record = '%s: %s' % (arrow.now().strftime( "%Y-%m-%d %H:%M:%S.%f"), clean_bash_escape(i)) logger.debug(record) self.input_record.append(record) # empty input_buffer self.input_buffer = line_msg[-1] try: self.bcs_client.write_message(message) except Exception as e: logger.exception(e)
class NotificationHandler(WebSocketHandler): """Websocket handler to send messages to subscribed clients.""" connections = {} webapp_export_timestamps = {} def __init__(self, application, request, **kwargs): super(NotificationHandler, self).__init__(application, request, **kwargs) self.last_pong = None self.timeout_callback = None def open(self, *args, **kwargs): self.connections[self] = None # Set last pong timestamp to current timestamp and ping client self.last_pong = IOLoop.current().time() self.ping(b"") # Start periodic callback checking time since last received pong self.timeout_callback = PeriodicCallback( self.timeout_check, WEBSOCKET_PING_INTERVAL * 1000) self.timeout_callback.start() def data_received(self, chunk): pass def on_message(self, message): if message == "subscribe-webapp": self.connections[self] = "webapp" elif message == "subscribe-reposcan": self.connections[self] = "reposcan" elif message == "subscribe-listener": self.connections[self] = "listener" elif message == "invalidate-cache" and self.connections[ self] == "reposcan": self.webapp_export_timestamps.clear() self.send_message("webapp", "refresh-cache") elif message.startswith( "refreshed") and self.connections[self] == "webapp": _, timestamp = message.split() self.webapp_export_timestamps[self] = timestamp # All webapp connections are refreshed with same dump version if (len([c for c in self.connections.values() if c == "webapp" ]) == len(self.webapp_export_timestamps) and len(set(self.webapp_export_timestamps.values())) == 1): self.send_message("listener", "webapps-refreshed") def on_close(self): self.timeout_callback.stop() del self.connections[self] def timeout_check(self): """Check time since we received last pong. Send ping again.""" now = IOLoop.current().time() if now - self.last_pong > WEBSOCKET_TIMEOUT: self.close(1000, "Connection timed out.") return self.ping(b"") def on_pong(self, data): """Pong received from client.""" self.last_pong = IOLoop.current().time() @staticmethod def send_message(target_client_type, message): """Send message to selected group of connected clients.""" for client, client_type in NotificationHandler.connections.items(): if client_type == target_client_type: client.write_message(message)