def wait(self): while True: for name, t in self.thread_map.items(): t.join(30) if not t.isAlive(): logger.info("cluster {}'s watcher thread crashed, restart it".format(name)) self.thread_map[name] = spawn(self.watch_app_job_pods, name)
def _refresh_thread_func(self, *args, **kwargs): logger.info("starting sso refresher thread") while True: logger.debug("refresh sso cache.") try: self.refresh() except Exception: logger.exception("error when refresh sso, retry...") time.sleep(REFRESH_INTERVAL)
def enter_pod(socket, appname): payload = None while True: message = socket.receive() if message is None: return try: payload = pod_entry_schema.loads(message) break except ValidationError as e: socket.send(json.dumps(e.messages)) except JSONDecodeError as e: socket.send(json.dumps({'error': str(e)})) app = App.get_by_name(appname) if not app: socket.send( make_errmsg('app {} not found'.format(appname), jsonize=True)) return if not g.user.granted_to_app(app): socket.send( make_errmsg( 'You\'re not granted to this app, ask administrators for permission', jsonize=True)) return args = payload.data podname = args['podname'] cluster = args['cluster'] namespace = args['namespace'] container = args.get('container', None) sh = KubeApi.instance().exec_shell(podname, namespace=namespace, cluster_name=cluster, container=container) need_exit = False def heartbeat_sender(): nonlocal need_exit interval = WS_HEARTBEAT_TIMEOUT - 3 if interval <= 0: interval = WS_HEARTBEAT_TIMEOUT try: while need_exit is False: time.sleep(interval) try: # send a null character to client logger.debug("send PING") send_ping(socket) except WebSocketError as e: need_exit = True return finally: logger.debug("pod entry heartbeat greenlet exit") def resp_sender(): nonlocal need_exit try: while sh.is_open() and need_exit is False: sh.update(timeout=1) if sh.peek_stdout(): msg = sh.read_stdout() logger.debug("STDOUT: %s" % msg) socket.send(msg) if sh.peek_stderr(): msg = sh.read_stderr() logger.debug("STDERR: %s" % msg) socket.send(msg) except ProtocolError: logger.warn('kubernetes disconnect client after default 10m...') except WebSocketError as e: logger.warn('client socket is closed') except Exception as e: logger.warn("unknown exception: {}".format(str(e))) finally: need_exit = True logger.debug("exec output sender greenlet exit") gevent.spawn(resp_sender) gevent.spawn(heartbeat_sender) # to avoid lost mysql connection exception db.session.remove() try: while need_exit is False: # get command from client message = socket.receive() if message is None: logger.info("client socket closed") break sh.write_stdin(message) continue finally: need_exit = True logger.debug("pod entry greenlet exit")
def get_app_pods_events(socket, appname): payload = None socket_active_ts = time.time() while True: message = socket.receive() if message is None: return try: payload = cluster_canary_schema.loads(message) break except ValidationError as e: socket.send(json.dumps(e.messages)) except JSONDecodeError as e: socket.send(json.dumps({'error': str(e)})) args = payload.data cluster = args['cluster'] canary = args['canary'] name = "{}-canary".format(appname) if canary else appname channel = make_app_watcher_channel_name(cluster, name) ns = DEFAULT_APP_NS app = App.get_by_name(appname) if not app: socket.send( make_errmsg('app {} not found'.format(appname), jsonize=True)) return if not g.user.granted_to_app(app): socket.send( make_errmsg( 'You\'re not granted to this app, ask administrators for permission', jsonize=True)) return # since this request may pend long time, so we remove the db session # otherwise we may get error like `sqlalchemy.exc.TimeoutError: QueuePool limit of size 50 overflow 10 reached, connection timed out` with session_removed(): pod_list = KubeApi.instance().get_app_pods(name, cluster_name=cluster, namespace=ns) pods = pod_list.to_dict() for item in pods['items']: data = { 'object': item, 'action': "ADDED", } socket.send(json.dumps(data, cls=VersatileEncoder)) pubsub = rds.pubsub() pubsub.subscribe(channel) need_exit = False def check_client_socket(): nonlocal need_exit while need_exit is False: if socket.receive() is None: need_exit = True break def heartbeat_sender(): nonlocal need_exit, socket_active_ts interval = WS_HEARTBEAT_TIMEOUT - 3 if interval <= 0: interval = WS_HEARTBEAT_TIMEOUT while need_exit is False: now = time.time() if now - socket_active_ts <= (interval - 1): time.sleep(interval - (now - socket_active_ts)) else: try: send_ping(socket) socket_active_ts = time.time() except WebSocketError as e: need_exit = True return gevent.spawn(check_client_socket) gevent.spawn(heartbeat_sender) try: while need_exit is False: resp = pubsub.get_message(timeout=30) if resp is None: continue if resp['type'] == 'message': raw_content = resp['data'] # omit the initial message where resp['data'] is 1L if not isinstance(raw_content, (bytes, str)): continue content = raw_content if isinstance(content, bytes): content = content.decode('utf-8') socket.send(content) socket_active_ts = time.time() finally: # need close the connection created by PUB/SUB, # otherwise it will cause too many redis connections pubsub.unsubscribe() pubsub.close() need_exit = True logger.info("ws connection closed")
def start(self): for name in KubeApi.instance().cluster_names: logger.info("create watcher thread for cluster {}".format(name)) self.thread_map[name] = spawn(self.watch_app_job_pods, name)
def start(self): for name in get_cluster_names(): logger.info("create watcher thread for cluster {}".format(name)) self.thread_map[name] = spawn(self.watch_app_pods, name)