Ejemplo n.º 1
0
    def list_events(self):
        event_response = self.api_instance.list_event_for_all_namespaces()
        events = []
        actions = []
        year = datetime.now().year
        month = datetime.now().month
        index = (self.cluster.name + '-{}.{}').format(year, month)
        es_client = log.es.get_es_client()
        for item in event_response.items:
            # 过滤kubeapps-plus的同步事件
            if "apprepo-sync-chartmuseum" in item.metadata.name:
                continue
            component, host = '', ''
            if item.source is not None and item.source.component is not None:
                component = item.source.component
            elif item.reporting_component is not None:
                component = item.reporting_component
            if item.source is not None and item.source.host is not None:
                host = item.source.host
            elif item.reporting_instance is not None:
                host = item.reporting_instance

            if item.last_timestamp is not None:
                last_timestamp = item.last_timestamp
            elif item.event_time is not None:
                last_timestamp = item.event_time
            else:
                last_timestamp = item.metadata.creation_timestamp

            event = Event(uid=item.metadata.uid,
                          name=item.metadata.name,
                          type=item.type,
                          cluster_name=self.cluster.name,
                          action=item.action,
                          reason=item.reason,
                          count=item.count,
                          host=host,
                          component=component,
                          namespace=item.metadata.namespace,
                          message=item.message,
                          last_timestamp=last_timestamp,
                          first_timestamp=item.first_timestamp)
            events.append(event.__dict__)
            # 判断根据uid判断这个事件是否已经存入es
            if log.es.get_event_uid_exist(es_client, index, item.metadata.uid):
                action = {
                    '_op_type': 'index',
                    '_index': index,
                    '_type': 'event',
                    '_source': event.__dict__
                }
                actions.append(action)
                if event.type == 'Warning':
                    message_client = MessageClient()
                    message = self.get_event_message(event)
                    message_client.insert_message(message)
        return events, actions
Ejemplo n.º 2
0
    def set_cluster_data(self):
        self.check_authorization(2)
        nodes = self.list_nodes()
        pods = self.list_pods()
        namespaces = self.list_namespaces()
        deployments = self.list_deployments()

        cpu_usage = 0
        cpu_total = 0
        mem_total = 0
        mem_usage = 0
        count = len(nodes)
        warn_nodes = []
        for n in nodes:
            # 不计算异常node数据
            cpu_total = cpu_total + float(n['cpu'])
            cpu_usage = cpu_usage + float(n['cpu_usage'])
            mem_total = mem_total + float(n['mem'])
            mem_usage = mem_usage + float(n['mem_usage'])
            if float(n['cpu_usage']) == 0 and float(n['mem_usage']) == 0:
                count = count - 1
            elif float(n['cpu_usage']) > 0.8 or float(n['mem_usage']) > 0.8:
                warn_nodes.append(n)
        if count > 0:
            cpu_usage = cpu_usage / count
            mem_usage = mem_usage / count
        if len(warn_nodes) > 0:
            message_client = MessageClient()
            message = self.get_usage_message(warn_nodes)
            message_client.insert_message(message)

        sort_restart_pod_list = quick_sort_pods(self.restart_pods)
        error_pods = quick_sort_pods(self.error_pods)

        cluster_data = ClusterData(cluster=self.cluster,
                                   token=self.token,
                                   pods=pods,
                                   nodes=nodes,
                                   namespaces=namespaces,
                                   deployments=deployments,
                                   cpu_usage=cpu_usage,
                                   cpu_total=cpu_total,
                                   mem_total=mem_total,
                                   mem_usage=mem_usage,
                                   restart_pods=sort_restart_pod_list,
                                   warn_containers=self.warn_containers,
                                   error_loki_containers=[],
                                   error_pods=error_pods)
        return self.redis_cli.set(self.cluster.name,
                                  json.dumps(cluster_data.__dict__))
Ejemplo n.º 3
0
 def start(self):
     result = {"raw": {}, "summary": {}}
     pre_deploy_execution_start.send(self.__class__, execution=self)
     cluster = self.get_cluster()
     settings = Setting.get_db_settings()
     extra_vars = {
         "cluster_name": cluster.name,
         "cluster_domain": cluster.cluster_doamin_suffix
     }
     extra_vars.update(settings)
     extra_vars.update(cluster.configs)
     ignore_errors = False
     return_running = False
     message_client = MessageClient()
     message = {
         "item_id": cluster.item_id,
         "title": self.get_operation_name(),
         "content": "",
         "level": "INFO",
         "type": "SYSTEM"
     }
     try:
         if self.operation == "install":
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             cluster.change_status(Cluster.CLUSTER_STATUS_INSTALLING)
             result = self.on_install(extra_vars)
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'uninstall':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             cluster.change_status(Cluster.CLUSTER_STATUS_DELETING)
             result = self.on_uninstall(extra_vars)
             cluster.change_status(Cluster.CLUSTER_STATUS_READY)
             kubeops_api.cluster_monitor.delete_cluster_redis_data(cluster.name)
         elif self.operation == 'bigip-config':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             ignore_errors = True
             result = self.on_f5_config(extra_vars)
         elif self.operation == 'upgrade':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             cluster.change_status(Cluster.CLUSTER_STATUS_UPGRADING)
             package_name = self.params.get('package', None)
             package = Package.objects.get(name=package_name)
             extra_vars.update(package.meta.get('vars'))
             result = self.on_upgrade(extra_vars)
             if result.get('summary', {}).get('success', False):
                 cluster.upgrade_package(package_name)
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'scale':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             ignore_errors = True
             return_running = True
             cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING)
             result = self.on_scaling(extra_vars)
             cluster.exit_new_node()
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'add-worker':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             ignore_errors = True
             return_running = True
             cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING)
             result = self.on_add_worker(extra_vars)
             cluster.exit_new_node()
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'remove-worker':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             ignore_errors = True
             return_running = True
             cluster.change_status(Cluster.CLUSTER_DEPLOY_TYPE_SCALING)
             result = self.on_remove_worker(extra_vars)
             if not result.get('summary', {}).get('success', False):
                 cluster.exit_new_node()
             else:
                 node_name = self.params.get('node', None)
                 cluster.change_to()
                 node = Node.objects.get(name=node_name)
                 node.delete()
                 cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'restore':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             cluster.change_status(Cluster.CLUSTER_STATUS_RESTORING)
             cluster_backup_id = self.params.get('clusterBackupId', None)
             result = self.on_restore(extra_vars, cluster_backup_id)
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         elif self.operation == 'backup':
             logger.info(msg="cluster: {} exec: {} ".format(cluster, self.operation))
             cluster.change_status(Cluster.CLUSTER_STATUS_BACKUP)
             cluster_storage_id = self.params.get('backupStorageId', None)
             result = self.on_backup(extra_vars)
             self.on_upload_backup_file(cluster_storage_id)
             cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
         if not result.get('summary', {}).get('success', False):
             message['content'] = self.get_content(False)
             message['level'] = 'WARNING'
             if not ignore_errors:
                 cluster.change_status(Cluster.CLUSTER_STATUS_ERROR)
             if return_running:
                 cluster.change_status(Cluster.CLUSTER_STATUS_RUNNING)
             logger.error(msg=":cluster {} exec {} error".format(cluster, self.operation), exc_info=True)
         else:
             message['content'] = self.get_content(True)
         message_client.insert_message(message)
     except Exception as e:
         logger.error(msg=":cluster {} exec {} error".format(cluster, self.operation), exc_info=True)
         cluster.change_status(Cluster.CLUSTER_STATUS_ERROR)
         message['content'] = self.get_content(False)
         message['level'] = 'WARNING'
         message_client.insert_message(message)
     post_deploy_execution_start.send(self.__class__, execution=self, result=result, ignore_errors=ignore_errors)
     return result