Exemple #1
0
    def __init__(self, cluster_config, node_config, clean):

        self.cluster_config = cluster_config
        self.node_config = node_config
        self.maintain_config = common.load_yaml_file("k8sPaiLibrary/maintainconf/repair.yaml")
        self.jobname = "repair"
        self.clean_flag = clean
Exemple #2
0
    def __init__(self, cluster_config, **kwargs):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        self.maintain_config = common.load_yaml_file(
            "k8sPaiLibrary/maintainconf/deploy.yaml")
        self.clean_flag = kwargs["clean"]
Exemple #3
0
    def __init__(self, cluster_config, **kwargs):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        maintain_configuration_path = os.path.join(
            package_directory_deploy, "../maintainconf/deploy.yaml")
        self.maintain_config = common.load_yaml_file(
            maintain_configuration_path)
        self.clean_flag = kwargs["clean"]
Exemple #4
0
    def __init__(self, cluster_config, node_config, clean):

        self.cluster_config = cluster_config
        self.node_config = node_config
        maintain_configuration_path = os.path.join(
            package_directory_repair, "../maintainconf/repair.yaml")
        self.maintain_config = common.load_yaml_file(
            maintain_configuration_path)
        self.jobname = "repair"
        self.clean_flag = clean
Exemple #5
0
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        self.node_config = node_config
        self.maintain_config = common.load_yaml_file(
            "k8sPaiLibrary/maintainconf/remove.yaml")
        self.clean_flag = clean
        self.jobname = "remove-node"
Exemple #6
0
    def __init__(self, cluster_object_model, **kwargs):

        self.logger = logging.getLogger(__name__)

        self.cluster_object_model = cluster_object_model
        maintain_configuration_path = os.path.join(package_directory_clean, "../maintainconf/clean.yaml")
        self.maintain_config = common.load_yaml_file(maintain_configuration_path)
        self.clean_flag = kwargs["clean"]
        self.force_flag = kwargs["force"]
        self.jobname = "clean"
Exemple #7
0
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        self.node_config = node_config
        maintain_configuration_path = os.path.join(package_directory_remove, "../maintainconf/remove.yaml")
        self.maintain_config = common.load_yaml_file(maintain_configuration_path)
        self.clean_flag = clean
        self.jobname = "remove-node"
Exemple #8
0
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.logger.info("Initialize class etcdfix to fix the broken etcd member on {0}".format(node_config["nodename"]))
        self.logger.debug("Node-configuration: {0}".format(str(node_config)))

        self.cluster_config = cluster_config
        self.bad_node_config = node_config
        maintain_configuration_path = os.path.join(package_directory_etcdfix, "../maintainconf/etcdfix.yaml")
        self.maintain_config = common.load_yaml_file(maintain_configuration_path)
        self.clean_flag = clean
Exemple #9
0
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.logger.info(
            "Initialize class etcdfix to fix the broken etcd member on {0}".
            format(node_config["nodename"]))
        self.logger.debug("Node-configuration: {0}".format(str(node_config)))

        self.cluster_config = cluster_config
        self.bad_node_config = node_config
        self.maintain_config = common.load_yaml_file(
            "k8sPaiLibrary/maintainconf/etcdfix.yaml")
        self.clean_flag = clean
Exemple #10
0
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        self.node_config = node_config
        self.maintain_config = common.load_yaml_file("k8sPaiLibrary/maintainconf/add.yaml")
        self.clean_flag = clean

        if node_config['k8s-role'] == 'worker':
            self.jobname = "add-worker-node"

        else:
            self.jobname = "error"
            self.logger.error("[{0}] Error: {1} is an undefined role, quit add job in host [{2}]".format(time.asctime(), node_config['k8s-role'], node_config['nodename']))
Exemple #11
0
    def check(self):

        self.logger.info("Checking kubectl's configuration for paictl.")

        if not os.path.exists(self.kube_conf_path):
            self.logger.warning(
                "CHECKING FAILED: The path {0} doesn't exist.".format(
                    self.kube_conf_path))
            return False
        self.logger.info("CHECKING PASS: The path {0} exists.".format(
            self.kube_conf_path))

        if not os.path.isfile("{0}/config".format(self.kube_conf_path)):
            self.logger.warning(
                "CHECKING FAILED: The configuration file {0}/config doesn't exist."
                .format(self.kube_conf_path))
            return False
        self.logger.info(
            "CHECKING PASS: The configuration file {0}/config exists.".format(
                self.kube_conf_path))

        try:
            local_kubectl_conf = common.load_yaml_file("{0}/config".format(
                self.kube_conf_path))
            api_server_address = local_kubectl_conf['clusters'][0]['cluster'][
                'server']

            api_server_address_pai_conf = "http://{0}:8080".format(
                self.cluster_config['clusterinfo']['api-servers-ip'])

            if api_server_address != api_server_address_pai_conf:
                self.logger.warning(
                    "CHECKING FAILED: The api_server_address in local configuration is different from the one in pai's configuration."
                    .format(self.kube_conf_path))
                return False

        except Exception as e:

            self.logger.error(
                "CHECK FAILED:  Unable to compare api_server_address in the configuration."
            )
            return False

        self.logger.info("Kubectl environment checking task is passed.")

        return True
Exemple #12
0
Fichier : add.py Projet : s2100/pai
    def __init__(self, cluster_config, node_config, clean):

        self.logger = logging.getLogger(__name__)

        self.cluster_config = cluster_config
        self.node_config = node_config
        maintain_configuration_path = os.path.join(package_directory_add, "../maintainconf/add.yaml")
        self.maintain_config = common.load_yaml_file(maintain_configuration_path)
        self.clean_flag = clean

        if node_config['k8s-role'] == 'worker':
            self.jobname = "add-worker-node"
        elif node_config['k8s-role'] == 'master':
            self.jobname = "add-master-node"
        else:
            self.jobname = "error"
            self.logger.error("[{0}] Error: {1} is an undefined role, quit add job in host [{2}]".format(time.asctime(), node_config['k8s-role'], node_config['nodename']))
Exemple #13
0
def check_docker_daemon_status(outputFile, configFilePath):
    cluster_config = common.load_yaml_file(configFilePath)
    node_configs = cluster_config['machine-list']
    username = ""
    password = ""
    sshport = ""

    if "default-machine-properties" in cluster_config:
        if "username" in cluster_config["default-machine-properties"]:
            username = cluster_config["default-machine-properties"]["username"]
        if "password" in cluster_config["default-machine-properties"]:
            password = cluster_config["default-machine-properties"]["password"]
        if "sshport" in cluster_config["default-machine-properties"]:
            port = cluster_config["default-machine-properties"]["sshport"]
    # execute cmd to check health
    cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi"
    errorNodeCout = 0
    for node_config in node_configs:
        try:
            if "username" not in node_config or "password" not in node_config or "sshport" not in node_config:
                node_config["username"] = username
                node_config["password"] = password
                node_config["port"] = port

            flag = common.ssh_shell_paramiko(node_config, cmd)
            if not flag:
                errorNodeCout += 1
                # single node docker health
                logger.error(
                    "node_current_docker_error{{instance=\"{}\"}} {}\n".format(
                        node_config["hostip"], 1))
        except:
            exception = sys.exc_info()
            for e in exception:
                logger.error("watchdog error {}".format(e))
            errorNodeCout += 1
            # single node docker health
            logger.error(
                "node_current_docker_error{{instance=\"{}\"}} {}\n".format(
                    node_config["hostip"], 1))

    if errorNodeCout > 0:
        # aggregate all nodes docker health total count
        logger.error("docker_error_node_count {}\n".format(errorNodeCout))
    outputFile.write("docker_error_node_count {}\n".format(errorNodeCout))
Exemple #14
0
def collect_docker_daemon_status(configFilePath):
    metrics = []

    cluster_config = common.load_yaml_file(configFilePath)
    node_configs = cluster_config['machine-list']
    username = ""
    password = ""
    sshport = ""

    if "default-machine-properties" in cluster_config:
        if "username" in cluster_config["default-machine-properties"]:
            username = cluster_config["default-machine-properties"]["username"]
        if "password" in cluster_config["default-machine-properties"]:
            password = cluster_config["default-machine-properties"]["password"]
        if "sshport" in cluster_config["default-machine-properties"]:
            port = cluster_config["default-machine-properties"]["sshport"]

    cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi"
    errorNodeCout = 0

    for node_config in node_configs:
        ip = node_config["hostip"]
        label = {"instance": ip}

        try:
            if "username" not in node_config or "password" not in node_config or "sshport" not in node_config:
                node_config["username"] = username
                node_config["password"] = password
                node_config["port"] = port

            flag = common.ssh_shell_paramiko(node_config, cmd)
            if not flag:
                errorNodeCout += 1
                # single node docker health
                metrics.append(Metric("node_current_docker_error", label, 1))
        except Exception as e:
            logger.exception("ssh to %s failed", ip)
            errorNodeCout += 1
            metrics.append(Metric("node_current_docker_error", label, 1))

    if errorNodeCout > 0:
        metrics.append(Metric("docker_error_node_count", {}, errorNodeCout))

    return metrics
Exemple #15
0
def load_machine_list(configFilePath):
    cluster_config = common.load_yaml_file(configFilePath)
    return cluster_config['hosts']