def list_events(self): event_response = self.api_instance.list_event_for_all_namespaces() events = [] actions = [] year = datetime.now().year month = datetime.now().month index = (self.cluster.name + '-{}.{}').format(year, month) es_client = log.es.get_es_client() for item in event_response.items: # 过滤kubeapps-plus的同步事件 if "apprepo-sync-chartmuseum" in item.metadata.name: continue component, host = '', '' if item.source is not None and item.source.component is not None: component = item.source.component elif item.reporting_component is not None: component = item.reporting_component if item.source is not None and item.source.host is not None: host = item.source.host elif item.reporting_instance is not None: host = item.reporting_instance if item.last_timestamp is not None: last_timestamp = item.last_timestamp elif item.event_time is not None: last_timestamp = item.event_time else: last_timestamp = item.metadata.creation_timestamp event = Event(uid=item.metadata.uid, name=item.metadata.name, type=item.type, cluster_name=self.cluster.name, action=item.action, reason=item.reason, count=item.count, host=host, component=component, namespace=item.metadata.namespace, message=item.message, last_timestamp=last_timestamp, first_timestamp=item.first_timestamp) events.append(event.__dict__) # 判断根据uid判断这个事件是否已经存入es if log.es.get_event_uid_exist(es_client, index, item.metadata.uid): action = { '_op_type': 'index', '_index': index, '_type': 'event', '_source': event.__dict__ } actions.append(action) if event.type == 'Warning': message_client = MessageClient() message = self.get_event_message(event) message_client.insert_message(message) return events, actions
def set_cluster_data(self): self.check_authorization(2) nodes = self.list_nodes() pods = self.list_pods() namespaces = self.list_namespaces() deployments = self.list_deployments() cpu_usage = 0 cpu_total = 0 mem_total = 0 mem_usage = 0 count = len(nodes) warn_nodes = [] for n in nodes: # 不计算异常node数据 cpu_total = cpu_total + float(n['cpu']) cpu_usage = cpu_usage + float(n['cpu_usage']) mem_total = mem_total + float(n['mem']) mem_usage = mem_usage + float(n['mem_usage']) if float(n['cpu_usage']) == 0 and float(n['mem_usage']) == 0: count = count - 1 elif float(n['cpu_usage']) > 0.8 or float(n['mem_usage']) > 0.8: warn_nodes.append(n) if count > 0: cpu_usage = cpu_usage / count mem_usage = mem_usage / count if len(warn_nodes) > 0: message_client = MessageClient() message = self.get_usage_message(warn_nodes) message_client.insert_message(message) sort_restart_pod_list = quick_sort_pods(self.restart_pods) error_pods = quick_sort_pods(self.error_pods) cluster_data = ClusterData(cluster=self.cluster, token=self.token, pods=pods, nodes=nodes, namespaces=namespaces, deployments=deployments, cpu_usage=cpu_usage, cpu_total=cpu_total, mem_total=mem_total, mem_usage=mem_usage, restart_pods=sort_restart_pod_list, warn_containers=self.warn_containers, error_loki_containers=[], error_pods=error_pods) return self.redis_cli.set(self.cluster.name, json.dumps(cluster_data.__dict__))
def start(self): result = {"raw": {}, "summary": {}} pre_deploy_execution_start.send(self.__class__, execution=self) cluster = self.get_cluster() settings = Setting.get_db_settings() extra_vars = { "cluster_name": cluster.name, "cluster_domain": cluster.cluster_doamin_suffix } extra_vars.update(settings) extra_vars.update(cluster.configs) ignore_errors = False return_running = False message_client = MessageClient() message = { "item_id": cluster.item_id, "title": self.get_operation_name(), "content": "", "level": "INFO", "type": "SYSTEM" } try: if self.operation == "install": logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) cluster.change_status(Cluster.CLUSTER_STATUS_INSTALLING) result = self.on_install(extra_vars) cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'uninstall': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) cluster.change_status(Cluster.CLUSTER_STATUS_DELETING) result = self.on_uninstall(extra_vars) cluster.change_status(Cluster.CLUSTER_STATUS_READY) kubeops_api.cluster_monitor.delete_cluster_redis_data(cluster.name) elif self.operation == 'bigip-config': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) ignore_errors = True result = self.on_f5_config(extra_vars) elif self.operation == 'upgrade': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) cluster.change_status(Cluster.CLUSTER_STATUS_UPGRADING) package_name = self.params.get('package', None) package = Package.objects.get(name=package_name) extra_vars.update(package.meta.get('vars')) result = self.on_upgrade(extra_vars) if result.get('summary', {}).get('success', False): cluster.upgrade_package(package_name) cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'scale': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) ignore_errors = True return_running = True cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING) result = self.on_scaling(extra_vars) cluster.exit_new_node() cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'add-worker': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) ignore_errors = True return_running = True cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING) result = self.on_add_worker(extra_vars) cluster.exit_new_node() cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'remove-worker': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) ignore_errors = True return_running = True cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING) result = self.on_remove_worker(extra_vars) if not result.get('summary', {}).get('success', False): cluster.exit_new_node() else: node_name = self.params.get('node', None) cluster.change_to() node = Node.objects.get(name=node_name) node.delete() cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'restore': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) cluster.change_status(Cluster.CLUSTER_STATUS_RESTORING) cluster_backup_id = self.params.get('clusterBackupId', None) result = self.on_restore(extra_vars, cluster_backup_id) cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) elif self.operation == 'backup': logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation)) cluster.change_status(Cluster.CLUSTER_STATUS_BACKUP) cluster_storage_id = self.params.get('backupStorageId', None) result = self.on_backup(extra_vars) self.on_upload_backup_file(cluster_storage_id) cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) if not result.get('summary', {}).get('success', False): message['content'] = self.get_content(False) message['level'] = 'WARNING' if not ignore_errors: cluster.change_status(Cluster.CLUSTER_STATUS_ERROR) if return_running: cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING) logger.error(msg=":cluster {} exec {} error".format(cluster, self.operation), exc_info=True) else: message['content'] = self.get_content(True) message_client.insert_message(message) except Exception as e: logger.error(msg=":cluster {} exec {} error".format(cluster, self.operation), exc_info=True) cluster.change_status(Cluster.CLUSTER_STATUS_ERROR) message['content'] = self.get_content(False) message['level'] = 'WARNING' message_client.insert_message(message) post_deploy_execution_start.send(self.__class__, execution=self, result=result, ignore_errors=ignore_errors) return result