def k8s_project_update(): db_token = db_op.platform_token params = request.get_json() new_replicas = None msg = None project = None version = None try: if params: if 'project' in params and 'version' in params and 'access_token' in params and 'context' in params: token = params['access_token'] project = params['project'] version = params['version'] context = params['context'] new_image = "%s/%s:%s" % (docker_registry[context], project.split('.')[0], version) if 'replicas' in params: new_replicas = params['replicas'] # 验证token是否有效 vals = db_token.query.filter( and_( db_token.token == token, db_token.expire > time.strftime( '%Y-%m-%d', time.localtime()))).all() if vals: redis_key = 'op_k8s_update_%s' % time.strftime( '%Y%m%d%H%M%S', time.localtime()) Scheduler = produce.SchedulerPublish() Scheduler = Scheduler.Scheduler_mem( k8s_resource.object_update, [ context, new_image, new_replicas, version, redis_key, 'api' ]) Scheduler.start() else: msg = '授权验证不通过!' else: msg = '传递参数错误!' else: msg = '传递参数不能为空!' except Exception as e: msg = e finally: if msg: ops_token = app.config.get('OPS_TOKEN') text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "Error:%s" % msg, '**请确认请求正确!**' ] tools.dingding_msg(text, ops_token) return jsonify({'result': 'ok'})
def k8s_health_check(): for context in contexts: config.load_kube_config(config_file, context=context) v1 = client.CoreV1Api() try: #nodes健康检测 ret = v1.list_node(watch=False) for i in ret.items: if 'node-role.kubernetes.io/master' in i.metadata.labels: node_type = 'master' else: node_type = 'node' status = i.status.conditions[-1].type if status != 'Ready': text = ['**容器平台NODE报警:%s**' % i.metadata.name,'节点类型:%s' %node_type,'节点状态:%s' %status,'需及时处理!'] tools.dingding_msg(text,token=ops_token) except Exception as e: logging.error(e) try: # endpoints健康检测 ret = v1.list_namespaced_endpoints('default') for i in ret.items: try: for infos in i.subsets: try: for info in infos.addresses: try: ip_header = '.'.join(str(info.ip).split('.')[:2]) if '{}.'.format(ip_header) in ('172.16.', '10.10.'): if not tcpping(host=info.ip, port=infos.ports[0].port, timeout=5): text = ['**容器平台endpoints报警:**', 'IP:%s' % info.ip, '服务端口:%s' % infos.ports[0].port, '服务端口不可用,需及时处理!'] tools.dingding_msg(text) except: continue except: continue except: continue except Exception as e: logging.error(e)
def object_update(args): try: namespace = "default" mounts = None healthcheck = None sidecar = None run_args = None new_image, new_replicas, version, redis_key, channel = args if new_image and redis_key: db_k8s = db_op.k8s_deploy db_docker_run = db_op.docker_run dm_name = new_image.split('/')[-1].split(':')[0] #生成新镜像 values = db_k8s.query.with_entities( db_k8s.project, db_k8s.container_port, db_k8s.image, db_k8s.war, db_k8s.replicas, db_k8s.re_requests, db_k8s.re_limits).filter( and_(db_k8s.deployment == dm_name, db_k8s.action != 'delete')).order_by(desc( db_k8s.id)).limit(1).all() project, container_port, image, war, replicas, re_requests, re_limits = values[ 0] vals = db_docker_run.query.with_entities( db_docker_run.run_args, db_docker_run.side_car).filter( db_docker_run.deployment == dm_name).all() if vals: run_args = eval(run_args[0][0]) sidecar = eval(run_args[0][1]) war = download_war(dm_name, version, run_args, redis_key) if not war: _flow_log("params error,update fail!") raise Redis.lpush(redis_key, "params error,update fail!") if not make_image(new_image, redis_key): _flow_log("image record not exists,update fail!") raise Redis.lpush(redis_key, "image record not exists,update fail!") try: _flow_log('start deploy image %s ......' % new_image) Redis.lpush(redis_key, 'start deploy image %s ......' % new_image) re_requests = eval(re_requests) re_limits = eval(re_limits) container_port = container_port.split(',') k8s = k8s_object(dm_name, image, container_port, replicas, mounts, healthcheck, sidecar, re_requests, re_limits) deployment = k8s.export_deployment() # Update container image deployment.spec.template.spec.containers[0].image = new_image if new_replicas: deployment.spec.replicas = int(new_replicas) replicas = new_replicas # Update the deployment try: api_instance = client.CoreV1Api() ret = api_instance.list_namespaced_pod(namespace=namespace) old_pos = [ i.metadata.name for i in ret.items if i.metadata.name.startswith(dm_name) ] api_instance = client.ExtensionsV1beta1Api() api_instance.patch_namespaced_deployment( name=dm_name, namespace=namespace, body=deployment) except Exception as e: logging.error(e) _flow_log('deployment parameter fail!') Redis.lpush(redis_key, 'deployment parameter fail!') else: _flow_log('开始进行更新后的结果验证......') Redis.lpush(redis_key, '开始进行更新后的结果验证......') if check_pod(dm_name, replicas, old_pos, redis_key): v = db_k8s( project=project, deployment=dm_name, image=new_image, war=war, container_port=container_port, replicas=replicas, re_requests=str(re_requests).replace("'", '"'), re_limits=str(re_limits).replace("'", '"'), action='update', update_date=time.strftime('%Y-%m-%d', time.localtime()), update_time=time.strftime('%H:%M:%S', time.localtime())) db_op.DB.session.add(v) db_op.DB.session.commit() _flow_log('%s 镜像更新成功!' % new_image) Redis.lpush(redis_key, '%s 镜像更新成功!' % new_image) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新成功", '**请关注业务健康状况!**' ] else: deployment.spec.template.spec.containers[ 0].image = image if image == new_image: delete_pod(dm_name) api_instance = client.ExtensionsV1beta1Api() api_instance.patch_namespaced_deployment( name=dm_name, namespace=namespace, body=deployment) _flow_log('%s 镜像更新失败并自动回滚!' % new_image) Redis.lpush(redis_key, '%s 镜像更新失败并自动回滚!' % new_image) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:失败并回滚", '**需要手动处理!**' ] except Exception as e: logging.error(e) _flow_log('fail:%s' % e) Redis.lpush(redis_key, 'fail:%s' % e) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新未完成", '**需要手动处理!**' ] except Exception as e: logging.error(e) if 'BaseException' not in str(e): _flow_log('fail:%s' % e) Redis.lpush(redis_key, 'fail:%s' % e) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新未完成", '**需要手动处理!**' ] finally: db_op.DB.session.remove() Redis.lpush(redis_key, '_End_') if channel == 'api': tools.dingding_msg(text, ops_token)
def ensure_server_auth(): try: db_work_order = db_op.work_order db_server_auth = db_op.server_auth db_sso = db_op.user_sso msg = None source = 'ensure_server_auth' Key = 'new_server_auth_work_number_%s' % dt actions = {'complete': '已完成', 'deny': '审批拒绝', 'agree': '审批通过'} action = tools.http_args(request, 'action') work_number = tools.http_args(request, 'work_number') #验证票据 ticket = tools.http_args(request, 'ticket') if ticket or (action == 'activate' and work_number): if ticket: work_number = Redis.get('work_order_ticket_%s' % ticket) if work_number: val = db_work_order.query.filter( db_work_order.work_number == int(work_number)).all() if val: val = db_work_order.query.filter( and_(db_work_order.work_number == int(work_number), db_work_order.source == source, db_work_order.status == '审批通过')).all() if val: db_work_order.query.filter( and_(db_work_order.work_number == int(work_number), db_work_order.source == source)).update({ db_work_order.dingid: g.dingId, db_work_order.status: '受理中' }) db_op.DB.session.commit() Redis.sadd(Key, work_number) Redis.delete('work_order_ticket_%s' % ticket) msg = "%s工单已受理!" % work_number else: msg = "工单暂时无法受理!" else: msg = "无效的请求验证地址!" else: msg = "无效的请求验证地址!" #验证执行状态 if action and work_number: if action in actions: if action in ('deny', 'agree'): if g.grade[0] != '0': msg = '当前用户无审批权限!' raise AssertionError val = db_work_order.query.filter( and_(db_work_order.work_number == int(work_number), db_work_order.source == source, db_work_order.status.in_(('待审批', '受理中')))).all() if val: db_work_order.query.filter( and_(db_work_order.work_number == int(work_number), db_work_order.source == source)).update( {db_work_order.status: actions[action]}) db_op.DB.session.commit() Redis.srem(Key, work_number) #完成发送邮件 if Redis.exists('op_send_mail_html_%s' % work_number): dingid = db_server_auth.query.with_entities( db_server_auth.dingid).filter( db_server_auth.work_number == work_number).all() mailer = db_sso.query.with_entities( db_sso.mail).filter( db_sso.dingunionid == dingid[0][0]).all() if mailer: Msg = Message("%s工单进度通知" % work_number, sender=sender, recipients=[mailer[0][0]], cc=[receiver], charset='utf-8') mail_html = Redis.get('op_send_mail_html_%s' % work_number) alarm_html = '<p style="color:red">工单当前进度:%s</p>' % actions[ action] Msg.html = '%s%s' % (mail_html, alarm_html) if action == 'agree': Msg.html = '%s%s%s' % ( mail_html, alarm_html, Redis.get( 'op_send_mail_url_%s' % work_number)) with mapp.app_context(): mail.send(Msg) if Redis.exists( 'op_send_dingding_msg_%s' % work_number) and action in ('deny', 'agree'): text = eval( Redis.get('op_send_dingding_msg_%s' % work_number)) text.append("##### 审批结果:%s" % actions[action]) if action == 'agree': text.append("##### %s" % Redis.get( 'op_send_dingding_url_%s' % work_number)) tools.dingding_msg(text, token=work_token) msg = "%s工单当前状态:%s!" % (work_number, actions[action]) else: msg = "无效操作!" except Exception as e: logging.error(e) if not msg: msg = "未知异常错误!" finally: #获取最新数据 tables = ('工单号', '日期', '申请人', '部门', '系统账号', '服务器列表', '申请权限', '所属用途', '执行人', '详情', '状态', '操作') users = db_sso.query.with_entities(db_sso.dingunionid, db_sso.realName, db_sso.department).all() users = {info[0]: info[1:] for info in users} servers = db_server_auth.query.with_entities( db_server_auth.work_number, db_server_auth.date, db_server_auth.account, db_server_auth.servers, db_server_auth.auth_level, db_server_auth.purpose, db_server_auth.dingid).all() servers = {info[0]: info[1:] for info in servers} work_orders = db_work_order.query.with_entities( db_work_order.work_number, db_work_order.dingid, db_work_order.status).filter( db_work_order.source == source).order_by(desc( db_work_order.id)).all() if action and work_number: if action == 'query': work_orders = db_work_order.query.with_entities( db_work_order.work_number, db_work_order.dingid, db_work_order.status).filter( and_(db_work_order.source == source, db_work_order.work_number == work_number)).all() if work_orders: work_orders = [list(info) for info in work_orders] for info in work_orders: info.extend(servers[info[0]][:-1]) info.insert(4, users[servers[info[0]][-1]][0]) info.insert(5, users[servers[info[0]][-1]][-1]) if info[1]: info.append(users[info[1]][0]) else: info.append('') new_work_number = Redis.smembers(Key) return render_template('ensure_server_auth.html', tables=tables, work_orders=work_orders, msg=msg, new_work_number=new_work_number, total='服务器权限工单管理')
def check_slave(info): #检查从reids是否同步 server_id,port = info if int(port) not in [10080]: #获取从redis端口列表 slave_ports = db_redis.query.with_entities(distinct(db_redis.port)).filter(and_(db_redis.Master_Host==server_id,db_redis.Master_Port==port)).all() if slave_ports: slave_ports = [int(sport[0]) for sport in slave_ports] for slave_port in slave_ports: #获取从redis信息 redis_lists = db_redis.query.with_entities(db_redis.server_id,db_redis.port,db_redis.requirepass).filter(and_(db_redis.slave=='是',db_redis.port==slave_port)).all() for info in redis_lists: text = None slave_lists = [] server_id,sport,requirepass = info try: sip = server_ids[str(server_id)] except: continue else: try: RC = redis.StrictRedis(sip, int(sport), decode_responses=True) if requirepass: RC = redis.StrictRedis(sip, int(sport), password=requirepass, decode_responses=True) except: continue else: #获取从redis时间戳 mvals = db_redis.query.with_entities(db_redis.Master_Host, db_redis.Master_Port).filter(and_(db_redis.server_id == server_id, db_redis.port == sport)).all() mip,mport = mvals[0] mip = server_ids[str(mip)] val = RC.get(Key) try: RC = redis.StrictRedis(mip, int(mport), decode_responses=True) if requirepass: RC = redis.StrictRedis(mip, int(mport), password=requirepass, decode_responses=True) except: continue else: if sip not in blacklist: mval = RC.get(Key) if mval and not val: text = ['**线上Redis同步报警:**', "同步Redis:%s:%s 验证数据:%s"%(mip,mport,mval), "延时Redis:%s:%s 验证数据:%s" % (sip, sport,val), "数据同步异常!", '**请及时进行处理!**'] if text: alarm_info = '%s:%s' % (server_id, sport) #判断节点redis if alarm_info in S_Masters: vals = db_redis.query.with_entities(db_redis.server_id,db_redis.port).filter(and_(db_redis.Master_Host==server_id,db_redis.Master_Port==sport)).all() if vals: slave_lists.extend(['%s:%s'%val for val in vals]) if alarm_info not in slave_lists: #redis异常报警 token = ops_token if int(sport) in (8379,6387,17379): token = redis_token tools.dingding_msg(text,token=token)
def k8s_ingress_log(): td = time.strftime('%Y-%m-%d', time.localtime()) th = time.strftime('%H:%M', time.localtime()) Key = 'op_k8s_ingress_log' stat_key = 'op_k8s_ingress_stat' rt_key = 'op_k8s_ingress_rt' k8s_domains_key = 'op_k8s_domains_%s' %td k8s_pv_key = 'op_k8s_pv_%s' %td now_date = datetime.datetime.now() lte_date = now_date.strftime('%Y-%m-%dT%H:%M:%S+08:00') gte_date = now_date - datetime.timedelta(minutes=1) gte_date = gte_date.strftime('%Y-%m-%dT%H:%M:%S+08:00') Domains = [] def auto_delete_pod(pod_name,text): try: namespace = "default" api_instance = client.CoreV1Api() ret = api_instance.list_namespaced_pod(namespace=namespace) for i in ret.items: if i.metadata.name.startswith(pod_name): RC.incr(delete_pod_key, 1) api_instance.delete_namespaced_pod(name=i.metadata.name, namespace=namespace, body=client.V1DeleteOptions()) time.sleep(30) except Exception as e: logging.error(e) finally: counts = RC.get(delete_pod_key) RC.delete(delete_pod_key) text.append('**自动处理问题pod数量:{}**'.format(counts)) return text try: loging.write('start %s ......' % k8s_ingress_log.__name__) # 获取容器平台并发访问数据 try: body = {"query": {"range": {"time_iso8601": {"gte": "%s" % gte_date, "lte": "%s" % lte_date}}}, "aggs": { "avg_resp": { "avg": {"field": "upstream_response_time"} } }} res = es.search(index='k8s-ingress-log-*', body=body) if res['hits']['total']: rt = float(res['aggregations']['avg_resp']['value']) counts = int(res['hits']['total']) if rt > 1: # 统计全部访问量 RC.hset('%s_%s' % (Key, td), th, counts*rt) else: RC.hset('%s_%s' % (Key, td), th, counts) RC.expire('%s_%s' % (Key, td), 864000) # 统计k8s总访问量 RC.incr(k8s_pv_key, counts) RC.expire(k8s_pv_key, 864000) except Exception as e: logging.error(e) # 获取es当前1分钟的状态码统计 try: body = {'size': 0, "query": { "bool": { "must": [{"range": {"time_iso8601": {"gte": gte_date, "lte": lte_date}}}]}}, "aggs": { "hosts": { "terms": { "field": "host.keyword", "size": 100 }, "aggs": { "counts": { "terms": { "field": "status", "size": 100 } } } } }} res = es.search(index='k8s-ingress-log-*', body=body) for infos in res['aggregations']['hosts']['buckets']: try: domain = infos['key'] Domains.append(domain) counts = int(infos['doc_count']) #统计域名列表 RC.sadd(k8s_domains_key,domain) #统计域名访问量 RC.hset('%s_%s_%s'%(Key,domain,td),th,counts) RC.expire('%s_%s_%s' % (Key, domain, td), 864000) #状态码统计 vals = {info['key']: info['doc_count'] for info in infos['counts']['buckets']} RC.hset('%s_%s_%s' % (stat_key, domain, td), th, vals) RC.expire('%s_%s_%s' % (stat_key, domain, td), 864000) except: continue except Exception as e: logging.error(e) try: # 获取es当前1分钟的响应时间统计 body = {'size': 0, "query": { "bool": { "must": [{"range": {"time_iso8601": {"gte": gte_date, "lte": lte_date}}}]}}, "aggs": { "hosts": { "terms": { "field": "host.keyword", "size": 100 }, "aggs": { "avg_resp": { "avg": {"field": "upstream_response_time"} } }}} } res = es.search(index='k8s-ingress-log-*', body=body) for infos in res['aggregations']['hosts']['buckets']: try: domain = infos['key'] RC.hset('%s_%s_%s' % (rt_key, domain, td), th,float('%.3f'%infos['avg_resp']['value'])) RC.expire('%s_%s_%s' % (rt_key, domain, td), 864000) except: continue except Exception as e: logging.error(e) try: for domain in Domains: #业务状态码和响应时间超时报警 text = ['**容器平台业务报警:%s**' % domain] stat_vals = 0.0 nd = now_date - datetime.timedelta(minutes=1) th = nd.strftime('%H:%M') vals = RC.hget('%s_%s_%s' % (stat_key, domain, td), th) if vals: vals = eval(str(vals)) if 200 in vals: stat_vals = vals[200] if len(vals) >1: total_vals = reduce(lambda x, y: x + y, vals.values()) else: total_vals = stat_vals if stat_vals >0: diff_vals = float(stat_vals)/float(total_vals) rt_vals = RC.hget('%s_%s_%s' % (rt_key, domain, td), th) if diff_vals < 0.99: Key = 'op_k8s_project_alarm' RC.incr(Key, 1) RC.expire(Key, 180) if int(RC.get(Key)) >3: db_project = db_op.project_list project = db_project.query.with_entities(distinct(db_project.project)).filter( db_project.domain.like('%{}%'.format(domain))).all() if project: db_k8s_deploy = db_op.k8s_deploy pod_name = db_k8s_deploy.query.with_entities(db_k8s_deploy.deployment).filter( db_k8s_deploy.project == project[0][0]).all() if pod_name: pod_name = pod_name[0][0] text.append("服务可用率:{}%".format('%.2f' % (diff_vals * 100))) if rt_vals: text.append("服务响应时间:{}ms".format(int(float(rt_vals) * 1000))) delete_pod_key = 'op_auto_delete_pod_%s_%s' % (pod_name, td) if not RC.exists(delete_pod_key): text = auto_delete_pod(pod_name,text) tools.dingding_msg(text) RC.delete(Key) except Exception as e: logging.error(e) except Exception as e: logging.error(e) finally: db_op.DB.session.remove() for key in (k8s_domains_key,k8s_pv_key): RC.expire(key,864000) loging.write('complete %s !' % k8s_ingress_log.__name__)
def alarm_load(): try: loging.write("start %s ......" %alarm_load.__name__) whitelist = [] dict_load = defaultdict() db_server = db_idc.idc_servers db_zabbix = db_idc.zabbix_info db_project = db_op.project_list db_project_other = db_op.project_other Influx_cli = InfluxDBClient(influxdb_host, influxdb_port, influxdb_user, influxdb_pw, 'zabbix_infos') host_infos = db_zabbix.query.with_entities(db_zabbix.ip, db_zabbix.ssh_port,db_zabbix.hostname,db_zabbix.update_time).filter(and_(db_zabbix.cpu_load > 100, db_zabbix.icmpping == 1)).all() Key = "op_alarm_load_whitelist" if RC_CLUSTER.exists(Key): whitelist = RC_CLUSTER.smembers(Key) #循环监控疑似问题服务器 for infos in host_infos: host,ssh_port,hostname,update_time=infos if time.strftime('%Y-%m-%d',time.localtime()) in update_time: try: if not host.startswith('172.16.19.'): now_time = datetime.datetime.now() dt = now_time - datetime.timedelta(minutes=10) dt = dt.strftime('%Y-%m-%dT%H:%M:%SZ') cmd = "select mean(*) from server_infos where time >='%s' group by hostname" % dt results = Influx_cli.query(cmd) if results: for key in results.keys(): if hostname == key[-1]['hostname']: for infos in results[key]: if infos['mean_cpu_load'] >100: dict_load[hostname] = (host,ssh_port,int(infos['mean_cpu_load'])) except Exception as e: logging.error(e) continue #进行重启操作 if dict_load: for hostname in dict_load: host,ssh_port,cpu_load = dict_load[hostname] # 判断ssh是否可以登录 try: Ssh = SSH.ssh(ip=host,ssh_port=ssh_port) except Exception as e: if not hostname.startswith('nj'): Ssh_Key = "op_ssh_login_fail_%s" %hostname RC.incr(Ssh_Key,1) RC.expire(Ssh_Key,350) if int(RC.get(Ssh_Key)) >5: tools.dingding_msg(text,token=ops_token) else: tools.dingding_msg(text) else: try: Key = 'op_alarm_load_%s' % hostname Project = None RC_CLUSTER.incr(Key, 5) RC_CLUSTER.expire(Key, 600) ctime = int(RC_CLUSTER.get(Key)) if hostname not in whitelist: #筛查可重启服务进程 results = Ssh.Run("ps -aux | sort -k3nr |head -n 1") if results['stdout']: results = results['stdout'][0].strip().split() try: if results[-1].endswith('-rpc.jar'): pro_jar = results[-1] if pro_jar in ['moji-location-rpc.jar']: Project =pro_jar.split('.')[0] else: for line in results: if '-Dcatalina.home=' in line : Project = line.strip().split('/')[-1] break except Exception as e: logging.error(e) if Project: try: # 判断是否是tomcat项目 ret = db_project.query.filter(and_(db_project.ip == host, db_project.ssh_port == ssh_port)).all() if ret: #重启问题tomcat result = Ssh.Run("supervisorctl restart {0}".format(Project)) if result['stderr']: text = ['**线上服务重启:%s**' % hostname, "CPU持续{0}分钟平均使用率:{1}%".format(ctime,cpu_load), "相关进程:{0}".format(Project), '**服务重启失败,需手动处理!**'] else: text = ['**线上服务重启:%s**' % hostname, "CPU持续{0}分钟平均使用率:{1}%".format(ctime,cpu_load), "相关进程:{0}".format(Project), '**服务重启成功!**'] ops_token = None else: # 判断是否是jar项目 server_id = db_server.query.with_entities(db_server.id).filter(db_server.hostname==hostname).all() if server_id[0]: ret = db_project_other.query.filter(db_project_other.server_id == int(server_id[0][0])).all() if ret: text = ['**线上服务器预警:%s**' % hostname, "CPU持续{0}分钟平均使用率:{1}%".format(ctime,cpu_load), "相关进程:{0}".format(Project), '**请及时进行处理!**'] if text and not hostname.startswith('nj'): tools.dingding_msg(text,ops_token) except Exception as e: logging.error(e) finally: Ssh.Close() finally: loging.write("%s complete!" % alarm_load.__name__) db_idc.DB.session.remove() db_op.DB.session.remove()
def object_update(args): try: db_k8s = db_op.k8s_deploy db_docker_run = db_op.docker_run namespace = "default" mounts = None text = None labels = None allcontexts = [] context, new_image, version, rollback, redis_key, channel, user = args dm_name = new_image.split('/')[-1].split(':')[0] # 获取已部署镜像部署信息 values = db_k8s.query.with_entities( db_k8s.project, db_k8s.container_port, db_k8s.image, db_k8s.war, db_k8s.replicas, db_k8s.re_requests, db_k8s.re_limits, db_k8s.healthcheck).filter( and_(db_k8s.deployment == dm_name, db_k8s.action != 'delete')).order_by(desc( db_k8s.id)).limit(1).all() project, container_port, image, war, replicas, re_requests, re_limits, healthcheck = values[ 0] except Exception as e: logging.error(e) else: try: if new_image and redis_key: try: vals = db_docker_run.query.with_entities( db_docker_run.dockerfile, db_docker_run.run_args, db_docker_run.side_car).filter( and_(db_docker_run.deployment == dm_name, db_docker_run.context == context)).all() docker_args, run_args, sidecar = vals[0] if docker_args: docker_args = eval(docker_args) if run_args: run_args = eval(run_args) except Exception as e: logging.error(e) else: if not rollback: war = download_war(dm_name, version, docker_args, run_args, redis_key) if not war: _flow_log("params error,update fail!") raise Redis.lpush(redis_key, "params error,update fail!") if not make_image(new_image, redis_key): _flow_log("image record not exists,update fail!") raise Redis.lpush( redis_key, "image record not exists,update fail!") try: re_requests = eval(re_requests) re_limits = eval(re_limits) allcontexts.append(context) if 'all-cluster' in context: allcontexts = contexts for context in allcontexts: _flow_log('开始更新 %s image %s ......' % (context, new_image)) Redis.lpush(redis_key, '*' * 80) Redis.lpush( redis_key, '开始更新 %s image %s ......' % (context, new_image)) k8s = k8s_object(context, dm_name, image, container_port.split(','), replicas, mounts, labels, healthcheck, sidecar, re_requests, re_limits) deployment = k8s.export_deployment() # Update container image deployment.spec.template.spec.containers[ 0].image = new_image # Update the deployment try: api_instance = client.CoreV1Api() ret = api_instance.list_namespaced_pod( namespace=namespace) old_pos = [ i.metadata.name for i in ret.items if i.metadata.name.startswith(dm_name) ] api_instance = client.ExtensionsV1beta1Api() api_instance.patch_namespaced_deployment( name=dm_name, namespace=namespace, body=deployment) except Exception as e: logging.error(e) _flow_log('deployment parameter fail!') Redis.lpush(redis_key, 'deployment parameter fail!') else: if rollback: action = 'rollback' _flow_log('开始进行回滚后的结果验证......') Redis.lpush(redis_key, '开始进行回滚后的结果验证......') else: action = 'update' _flow_log('开始进行更新后的结果验证......') Redis.lpush(redis_key, '开始进行更新后的结果验证......') if check_pod(context, dm_name, replicas, old_pos, redis_key): v = db_k8s( project=project, context=context, deployment=dm_name, image=new_image, war=war, container_port=container_port, replicas=replicas, re_requests=str(re_requests).replace( "'", '"'), re_limits=str(re_limits).replace( "'", '"'), action=action, healthcheck=healthcheck, update_date=time.strftime( '%Y-%m-%d', time.localtime()), update_time=time.strftime( '%H:%M:%S', time.localtime()), user=user) db_op.DB.session.add(v) db_op.DB.session.commit() if rollback: _flow_log('%s 镜像回滚成功!' % new_image) Redis.lpush(redis_key, '%s 镜像回滚成功!' % new_image) else: _flow_log('%s 镜像更新成功!' % new_image) Redis.lpush(redis_key, '%s 镜像更新成功!' % new_image) if channel == 'api': if rollback: text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新成功", '**请关注业务健康状况!**' ] else: text = [ '**容器平台自动回滚:**', "项目:%s" % project, "版本:%s" % version, "操作:回滚成功", '**请关注业务健康状况!**' ] else: if rollback: _flow_log('%s 镜像回滚失败!' % new_image) Redis.lpush(redis_key, '%s 镜像回滚失败!' % new_image) if channel == 'api': text = [ '**容器平台自动回滚:**', "项目:%s" % project, "版本:%s" % version, "操作:回滚失败", '**需要手动处理!**' ] else: deployment.spec.template.spec.containers[ 0].image = image if image == new_image: delete_pod(context, dm_name) api_instance = client.ExtensionsV1beta1Api( ) api_instance.patch_namespaced_deployment( name=dm_name, namespace=namespace, body=deployment) _flow_log('%s 镜像更新失败并自动回滚!' % new_image) Redis.lpush( redis_key, '%s 镜像更新失败并自动回滚!' % new_image) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:失败并回滚", '**需要手动处理!**' ] Redis.lpush(redis_key, '*' * 80) except Exception as e: logging.error(e) _flow_log('fail:%s' % e) Redis.lpush(redis_key, 'fail:%s' % e) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新未完成", '**需要手动处理!**' ] except Exception as e: logging.error(e) if 'BaseException' not in str(e): _flow_log('fail:%s' % e) Redis.lpush(redis_key, 'fail:%s' % e) if channel == 'api': text = [ '**容器平台自动上线:**', "项目:%s" % project, "版本:%s" % version, "操作:更新未完成", '**需要手动处理!**' ] finally: db_op.DB.session.remove() Redis.lpush(redis_key, '_End_') if channel == 'api': tools.dingding_msg(text, ops_token)