def watch_app_job_pods(self, cluster): last_seen_version = None label_selector = "kae-type in (app, job)" while True: try: if last_seen_version is not None: watcher = KubeApi.instance().watch_pods(cluster_name=cluster, label_selector=label_selector, resource_version=last_seen_version) else: watcher = KubeApi.instance().watch_pods(cluster_name=cluster, label_selector=label_selector) for event in watcher: obj = event['object'] labels = obj.metadata.labels or {} last_seen_version = obj.metadata.resource_version if 'kae-app-name' in labels: appname = labels['kae-app-name'] channel = make_app_watcher_channel_name(cluster, appname) data = { 'object': obj.to_dict(), 'action': event['type'], } rds.publish(message=json.dumps(data, cls=VersatileEncoder), channel=channel) elif 'kae-job-name' in labels: if event['type'] == 'DELETED': continue jobname = labels['kae-job-name'] handle_job_pod_event.delay(jobname, event['raw_object']) except ProtocolError: logger.warn('skip this error... because kubernetes disconnect client after default 10m...') except Exception as e: # logger.error("---------watch error ------------------") logger.exception("watch pods workers error")
def delete(self): try: db.session.delete(self) db.session.commit() except sqlalchemy.orm.exc.ObjectDeletedError: db.session.rollback() logger.warn('Error during deleting: Object %s already deleted', self)
def build_image(self, appname, git_tag): release = Release.get_by_app_and_tag(appname, git_tag) try: for msg in build_image_helper(appname, release): self.stream_output(msg) except BuildError as e: self.stream_output(e.data) except SoftTimeLimitExceeded: logger.warn("build timeout.") self.stream_output( make_errmsg( 'build timeout, please test in local environment and contact administrator' ))
def create(cls, app, cluster, content, comment=''): """app must be an App instance""" appname = app.name if isinstance(content, dict): content = json.dumps(content) try: new_cfg = cls(app_id=app.id, cluster=cluster, content=content, comment=comment) db.session.add(new_cfg) db.session.commit() except IntegrityError: logger.warn('Fail to create AppConfig %s, duplicate', appname) db.session.rollback() raise return new_cfg
def create(cls, name, app, specs_text, comment=''): """app must be an App instance""" appname = app.name # check the format of specs text(ignore the result) app_specs_schema.load(yaml.safe_load(specs_text)) try: new_yaml = cls(name=name, app_id=app.id, specs_text=specs_text, comment=comment) db.session.add(new_yaml) db.session.commit() except IntegrityError: logger.warn('Fail to create AppYaml %s %s, duplicate', appname, name) db.session.rollback() raise return new_yaml
def update(self, specs_text, image=None, build_status=False, branch='', author='', commit_message=''): """app must be an App instance""" # check the format of specs text(ignore the result) app_specs_schema.load(yaml.safe_load(specs_text)) misc = { 'author': author, 'commit_message': commit_message, 'git': self.git, } try: # self.specs_text = specs_text super(Release, self).update(specs_text=specs_text, image=image, build_status=build_status, misc=json.dumps(misc)) except: logger.warn('Fail to update Release %s %s', self.appname, self.tag) db.session.rollback() # raise return self
def celery_task_stream_response(celery_task_ids, timeout=0, exit_when_timeout=True): if isinstance(celery_task_ids, str): celery_task_ids = celery_task_ids, task_progress_channels = [ TASK_PUBSUB_CHANNEL.format(task_id=id_) for id_ in celery_task_ids ] pubsub = rds.pubsub() pubsub.subscribe(task_progress_channels) try: while pubsub.subscribed: resp = pubsub.get_message(timeout=timeout) if resp is None: if exit_when_timeout: logger.warn("pubsub timeout {}".format(celery_task_ids)) return None continue raw_content = resp['data'] # omit the initial message where item['data'] is 1L if not isinstance(raw_content, (bytes, str)): continue content = raw_content if isinstance(content, bytes): content = content.decode('utf-8') logger.debug('Got pubsub message: %s', content) # task will publish TASK_PUBSUB_EOF at success or failure if content.startswith('CELERY_TASK_DONE'): finished_task_id = content[content.find(':') + 1:] finished_task_channel = TASK_PUBSUB_CHANNEL.format( task_id=finished_task_id) logger.debug( 'Task %s finished, break celery_task_stream_response', finished_task_id) pubsub.unsubscribe(finished_task_channel) else: yield content finally: logger.debug("celery stream response exit ************") pubsub.unsubscribe() pubsub.close()
def create(cls, app, tag, specs_text): """app must be an App instance""" if isinstance(specs_text, Dict): specs_text = yaml.dump(specs_text.to_dict()) elif isinstance(specs_text, dict): specs_text = yaml.dump(specs_text) else: # check the format of specs text(ignore the result) app_specs_schema.load(yaml.load(specs_text)) try: new_release = cls(tag=tag, app_id=app.id, specs_text=specs_text) db.session.add(new_release) db.session.commit() except IntegrityError: logger.warn('Fail to create SpecVersion %s %s, duplicate', app.name, tag) db.session.rollback() raise return new_release
def create(cls, name, apps, actions, clusters=None): actions_txt, clusters_txt = None, None if actions: action_vals = [act.value for act in actions] actions_txt = json.dumps(action_vals) if clusters: clusters_txt = json.dumps(clusters) r = cls(name=name, apps=apps, actions=actions_txt, clusters=clusters_txt) try: db.session.add(r) db.session.commit() except IntegrityError: logger.warn('Fail to create role %s', name) db.session.rollback() raise return r
def create(cls, app, tag, yaml_name, specs_text, parent_id, cluster, config_id=None): """app must be an App instance""" if isinstance(specs_text, Dict): specs_text = yaml.dump(specs_text.to_dict()) elif isinstance(specs_text, dict): specs_text = yaml.dump(specs_text) else: # check the format of specs text(ignore the result) app_specs_schema.load(yaml.safe_load(specs_text)) try: ver = cls(tag=tag, app_id=app.id, parent_id=parent_id, cluster=cluster, config_id=config_id, yaml_name=yaml_name, specs_text=specs_text) db.session.add(ver) db.session.commit() except IntegrityError: logger.warn('Fail to create SpecVersion %s %s, duplicate', app.name, tag) db.session.rollback() raise return ver
def create(cls, app, tag, specs_text, image=None, build_status=False, branch='', author='', commit_message=''): """app must be an App instance""" appname = app.name # check the format of specs text(ignore the result) app_specs_schema.load(yaml.safe_load(specs_text)) misc = { 'author': author, 'commit_message': commit_message, 'git': app.git, } try: new_release = cls(tag=tag, app_id=app.id, image=image, build_status=build_status, specs_text=specs_text, misc=json.dumps(misc)) db.session.add(new_release) db.session.commit() except IntegrityError: logger.warn('Fail to create Release %s %s, duplicate', appname, tag) db.session.rollback() raise return new_release
def create(cls, name, git=None, branch=None, commit=None, specs_text=None, comment=None, status=None): try: job = cls(name=name, git=git, branch=branch, commit=commit, specs_text=specs_text, nickname=g.user.nickname, comment=comment, status=status) db.session.add(job) db.session.commit() except IntegrityError as e: logger.warn('Fail to create Job %s %s, duplicate', name) db.session.rollback() raise e return job
def resp_sender(): nonlocal need_exit try: while sh.is_open() and need_exit is False: sh.update(timeout=1) if sh.peek_stdout(): msg = sh.read_stdout() logger.debug("STDOUT: %s" % msg) socket.send(msg) if sh.peek_stderr(): msg = sh.read_stderr() logger.debug("STDERR: %s" % msg) socket.send(msg) except ProtocolError: logger.warn('kubernetes disconnect client after default 10m...') except WebSocketError as e: logger.warn('client socket is closed') except Exception as e: logger.warn("unknown exception: {}".format(str(e))) finally: need_exit = True logger.debug("exec output sender greenlet exit")
def delete(self): logger.warn('Deleting release %s', self) return super(SpecVersion, self).delete()
def build_app(socket, appname): """Build an image for the specified release. --- definitions: BuildArgs: type: object properties: tag: type: object parameters: - name: appname in: path type: string required: true - name: build_args in: body required: true schema: $ref: '#/definitions/BuildArgs' responses: 200: description: multiple stream messages schema: $ref: '#/definitions/StreamMessage' 400: description: Error information schema: $ref: '#/definitions/Error' examples: error: "xxx" """ payload = None total_msg = [] client_closed = False phase = "" def handle_msg(ss): nonlocal phase try: m = json.loads(ss) except: return False if m['success'] is False: total_msg.append(m['error']) return False if phase != m['phase']: phase = m['phase'] total_msg.append("***** PHASE {}".format(m['phase'])) raw_data = m.get('raw_data', None) if raw_data is None: raw_data = {} if raw_data.get('error', None): total_msg.append((str(raw_data))) return False if phase.lower() == "pushing": if len(raw_data) == 1 and 'status' in raw_data: total_msg.append(raw_data['status']) elif 'id' in raw_data and 'status' in raw_data: # TODO: make the output like docker push total_msg.append("{}:{}".format(raw_data['id'], raw_data['status'])) elif 'digest' in raw_data: total_msg.append("{}: digest: {} size: {}".format( raw_data.get('status'), raw_data['digest'], raw_data.get('size'))) else: total_msg.append(str(m)) else: total_msg.append(m['msg']) return True while True: message = socket.receive() if message is None: return try: payload = build_args_schema.loads(message) break except ValidationError as e: socket.send(json.dumps(e.messages)) except JSONDecodeError as e: socket.send(json.dumps({'error': str(e)})) args = payload.data tag = args["tag"] block = args['block'] app = App.get_by_name(appname) if not app: socket.send( make_errmsg('app {} not found'.format(appname), jsonize=True)) return if not g.user.granted_to_app(app): socket.send( make_errmsg( 'You\'re not granted to this app, ask administrators for permission', jsonize=True)) return release = app.get_release_by_tag(tag) if not release: socket.send( make_errmsg('release {} not found.'.format(tag), jsonize=True)) return if release.build_status: socket.send(make_msg("Finished", msg="already built", jsonize=True)) return def heartbeat_sender(): nonlocal client_closed interval = WS_HEARTBEAT_TIMEOUT - 3 if interval <= 0: interval = WS_HEARTBEAT_TIMEOUT while client_closed is False: try: time.sleep(interval) send_ping(socket) except WebSocketError as e: client_closed = True return gevent.spawn(heartbeat_sender) app_redis_key = make_app_redis_key(appname) # don't allow multiple build tasks for single app lock_name = "__app_lock_{}_build_aaa".format(appname) lck = redis_lock.Lock(rds, lock_name, expire=30, auto_renewal=True) with gevent.Timeout(APP_BUILD_TIMEOUT, False): if lck.acquire(blocking=block): async_result = build_image.delay(appname, tag) rds.hset(app_redis_key, "build-task-id", async_result.task_id) db.session.remove() try: for m in celery_task_stream_response(async_result.task_id, 900): # after 10 minutes, we still can't get output message, so we exit the build task if m is None: async_result.revoke(terminate=True) socket.send( make_errmsg( "doesn't receive any messages in last 15 minutes, build task for app {} seems to be stuck" .format(appname), jsonize=True)) break try: if client_closed is False: socket.send(m) except WebSocketError as e: client_closed = True logger.warn( "Can't send build msg to client: {}".format( str(e))) if handle_msg(m) is False: break except gevent.Timeout: async_result.revoke(terminate=True) logger.debug("********* build gevent timeout") socket.send( make_errmsg("timeout when build app {}".format(appname), jsonize=True)) except Exception as e: async_result.revoke(terminate=True) socket.send( make_errmsg("error when build app {}: {}".format( appname, str(e)), jsonize=True)) finally: lck.release() rds.hdel(app_redis_key, "build-task-id") logger.debug("************ terminate task") # after build exit, we send an email to the user if phase.lower() != "finished": subject = "KAE: Failed to build {}:{}".format(appname, tag) bearychat_msg = "KAE: Failed to build **{}:{}**".format( appname, tag) text_title = '<h2 style="color: #ff6161;"> Build Failed </h2>' build_result_text = '<strong style="color:#ff6161;"> build terminates prematurely.</strong>' else: release.update_build_status(True) subject = 'KAE: build {}:{} successfully'.format( appname, tag) bearychat_msg = 'KAE: build **{}:{}** successfully'.format( appname, tag) text_title = '<h2 style="color: #00d600;"> Build Success </h2>' build_result_text = '<strong style="color:#00d600; font-weight: 600">Build %s %s done.</strong>' % ( appname, tag) email_text_tpl = '''<div> <div>{}</div> <div style="background:#000; padding: 15px; color: #c4c4c4;"> <pre>{}</pre> </div> </div>''' email_text = email_text_tpl.format( text_title, html.escape("\n".join(total_msg)) + '\n' + build_result_text) email_list = [u.email for u in app.users] send_email(email_list, subject, email_text) bearychat_sendmsg(BEARYCHAT_CHANNEL, bearychat_msg) else: socket.send( make_msg( "Unknown", msg= "there seems exist another build task, try to fetch output", jsonize=True)) build_task_id = rds.hget(app_redis_key, "build-task-id") if not build_task_id: socket.send( make_errmsg("can't get build task id", jsonize=True)) return if isinstance(build_task_id, bytes): build_task_id = build_task_id.decode('utf8') for m in celery_task_stream_response(build_task_id, 900): # after 10 minutes, we still can't get output message, so we exit the build task try: if m is None: socket.send( make_errmsg( "doesn't receive any messages in last 15 minutes, build task for app {} seems to be stuck" .format(appname), jsonize=True)) break if handle_msg(m) is False: break if client_closed is False: socket.send(m) except WebSocketError as e: client_closed = True break
def delete(self): logger.warn('Deleting DeployVersion %s', self) return super(DeployVersion, self).delete()
def _inner(*args, **kwargs): try: return f(*args, **kwargs) except WebSocketError as e: logger.warn("send failed: {}".format(str(e)))
def delete(self): logger.warn('Deleting release %s', self) return super(Release, self).delete()