Example #1
0
 def wait(self):
     while True:
         for name, t in self.thread_map.items():
             t.join(30)
             if not t.isAlive():
                 logger.info("cluster {}'s watcher thread crashed, restart it".format(name))
                 self.thread_map[name] = spawn(self.watch_app_job_pods, name)
Example #2
0
 def _refresh_thread_func(self, *args, **kwargs):
     logger.info("starting sso refresher thread")
     while True:
         logger.debug("refresh sso cache.")
         try:
             self.refresh()
         except Exception:
             logger.exception("error when refresh sso, retry...")
         time.sleep(REFRESH_INTERVAL)
Example #3
0
def enter_pod(socket, appname):
    payload = None
    while True:
        message = socket.receive()
        if message is None:
            return
        try:
            payload = pod_entry_schema.loads(message)
            break
        except ValidationError as e:
            socket.send(json.dumps(e.messages))
        except JSONDecodeError as e:
            socket.send(json.dumps({'error': str(e)}))

    app = App.get_by_name(appname)
    if not app:
        socket.send(
            make_errmsg('app {} not found'.format(appname), jsonize=True))
        return

    if not g.user.granted_to_app(app):
        socket.send(
            make_errmsg(
                'You\'re not granted to this app, ask administrators for permission',
                jsonize=True))
        return

    args = payload.data
    podname = args['podname']
    cluster = args['cluster']
    namespace = args['namespace']
    container = args.get('container', None)
    sh = KubeApi.instance().exec_shell(podname,
                                       namespace=namespace,
                                       cluster_name=cluster,
                                       container=container)
    need_exit = False

    def heartbeat_sender():
        nonlocal need_exit
        interval = WS_HEARTBEAT_TIMEOUT - 3
        if interval <= 0:
            interval = WS_HEARTBEAT_TIMEOUT

        try:
            while need_exit is False:
                time.sleep(interval)
                try:
                    # send a null character to client
                    logger.debug("send PING")
                    send_ping(socket)
                except WebSocketError as e:
                    need_exit = True
                    return
        finally:
            logger.debug("pod entry heartbeat greenlet exit")

    def resp_sender():
        nonlocal need_exit
        try:
            while sh.is_open() and need_exit is False:
                sh.update(timeout=1)
                if sh.peek_stdout():
                    msg = sh.read_stdout()
                    logger.debug("STDOUT: %s" % msg)
                    socket.send(msg)
                if sh.peek_stderr():
                    msg = sh.read_stderr()
                    logger.debug("STDERR: %s" % msg)
                    socket.send(msg)
        except ProtocolError:
            logger.warn('kubernetes disconnect client after default 10m...')
        except WebSocketError as e:
            logger.warn('client socket is closed')
        except Exception as e:
            logger.warn("unknown exception: {}".format(str(e)))
        finally:
            need_exit = True
            logger.debug("exec output sender greenlet exit")

    gevent.spawn(resp_sender)
    gevent.spawn(heartbeat_sender)

    # to avoid lost mysql connection exception
    db.session.remove()
    try:
        while need_exit is False:
            # get command from client
            message = socket.receive()
            if message is None:
                logger.info("client socket closed")
                break
            sh.write_stdin(message)
            continue
    finally:
        need_exit = True
        logger.debug("pod entry greenlet exit")
Example #4
0
def get_app_pods_events(socket, appname):
    payload = None
    socket_active_ts = time.time()

    while True:
        message = socket.receive()
        if message is None:
            return
        try:
            payload = cluster_canary_schema.loads(message)
            break
        except ValidationError as e:
            socket.send(json.dumps(e.messages))
        except JSONDecodeError as e:
            socket.send(json.dumps({'error': str(e)}))

    args = payload.data
    cluster = args['cluster']
    canary = args['canary']
    name = "{}-canary".format(appname) if canary else appname
    channel = make_app_watcher_channel_name(cluster, name)
    ns = DEFAULT_APP_NS

    app = App.get_by_name(appname)
    if not app:
        socket.send(
            make_errmsg('app {} not found'.format(appname), jsonize=True))
        return

    if not g.user.granted_to_app(app):
        socket.send(
            make_errmsg(
                'You\'re not granted to this app, ask administrators for permission',
                jsonize=True))
        return

    # since this request may pend long time, so we remove the db session
    # otherwise we may get error like `sqlalchemy.exc.TimeoutError: QueuePool limit of size 50 overflow 10 reached, connection timed out`
    with session_removed():
        pod_list = KubeApi.instance().get_app_pods(name,
                                                   cluster_name=cluster,
                                                   namespace=ns)
        pods = pod_list.to_dict()
        for item in pods['items']:
            data = {
                'object': item,
                'action': "ADDED",
            }
            socket.send(json.dumps(data, cls=VersatileEncoder))

        pubsub = rds.pubsub()
        pubsub.subscribe(channel)
        need_exit = False

        def check_client_socket():
            nonlocal need_exit
            while need_exit is False:
                if socket.receive() is None:
                    need_exit = True
                    break

        def heartbeat_sender():
            nonlocal need_exit, socket_active_ts
            interval = WS_HEARTBEAT_TIMEOUT - 3
            if interval <= 0:
                interval = WS_HEARTBEAT_TIMEOUT

            while need_exit is False:
                now = time.time()
                if now - socket_active_ts <= (interval - 1):
                    time.sleep(interval - (now - socket_active_ts))
                else:
                    try:
                        send_ping(socket)
                        socket_active_ts = time.time()
                    except WebSocketError as e:
                        need_exit = True
                        return

        gevent.spawn(check_client_socket)
        gevent.spawn(heartbeat_sender)

        try:

            while need_exit is False:
                resp = pubsub.get_message(timeout=30)
                if resp is None:
                    continue

                if resp['type'] == 'message':
                    raw_content = resp['data']
                    # omit the initial message where resp['data'] is 1L
                    if not isinstance(raw_content, (bytes, str)):
                        continue
                    content = raw_content
                    if isinstance(content, bytes):
                        content = content.decode('utf-8')
                    socket.send(content)
                    socket_active_ts = time.time()
        finally:
            # need close the connection created by PUB/SUB,
            # otherwise it will cause too many redis connections
            pubsub.unsubscribe()
            pubsub.close()
            need_exit = True
    logger.info("ws connection closed")
Example #5
0
 def start(self):
     for name in KubeApi.instance().cluster_names:
         logger.info("create watcher thread for cluster {}".format(name))
         self.thread_map[name] = spawn(self.watch_app_job_pods, name)
Example #6
0
 def start(self):
     for name in get_cluster_names():
         logger.info("create watcher thread for cluster {}".format(name))
         self.thread_map[name] = spawn(self.watch_app_pods, name)