Beispiel #1
0
    def job_executer(self, node_config):

        self.logger.info("{0} job begins !".format(self.jobname))

        # sftp your script to remote host with paramiko.
        srcipt_package = "{0}.tar".format(self.jobname)
        src_local = "parcel-center/{0}".format(node_config["nodename"])
        dst_remote = "/home/{0}".format(node_config["username"])

        if common.sftp_paramiko(src_local, dst_remote, srcipt_package,
                                node_config) == False:
            return

        commandline = "tar -xvf {0}.tar".format(self.jobname,
                                                node_config['hostip'])
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to uncompress {0}.tar".format(
                self.jobname))
            return

        commandline = "sudo ./{0}/kubernetes-cleanup.sh".format(self.jobname)
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error(
                "Failed to cleanup the kubernetes deployment on {0}".format(
                    node_config['hostip']))
            return

        self.logger.info("Successfully running {0} job on node {1}".format(
            self.jobname, node_config["nodename"]))
Beispiel #2
0
    def update_etcd_cluster(self, good_node_config, bad_node_config):

        self.prepare_package(good_node_config, "etcd-reconfiguration-update")

        self.logger.info(
            "Begin to execute the job : etcd-reconfiguration-update.")
        self.logger.info("Update etcd cluster on host [{0}].".format(
            good_node_config['nodename']))

        script_package = "etcd-reconfiguration-update.tar"
        src_local = "parcel-center/{0}".format(good_node_config["nodename"])
        dst_remote = "/home/{0}".format(good_node_config["username"])

        if common.sftp_paramiko(src_local, dst_remote, script_package,
                                good_node_config) == False:
            return

        commandline = "tar -xvf {0}.tar && sudo ./{0}/{1}.sh {2} {3}".format(
            "etcd-reconfiguration-update", "update-etcd-cluster",
            bad_node_config['hostip'], bad_node_config['etcdid'])

        if common.ssh_shell_paramiko(good_node_config, commandline) == False:
            return

        self.logger.info(
            "Successfully update etcd cluster configuration on node {0}".
            format(bad_node_config["nodename"]))

        if self.clean_flag:
            self.delete_packege(good_node_config)
Beispiel #3
0
    def stop_bad_etcd_server(self, bad_node_config):

        self.prepare_package(bad_node_config, "etcd-reconfiguration-stop")

        self.logger.info(
            "Begin to execute the job : etcd-reconfiguration-stop.")
        self.logger.info("Stop the bad etcd server on host [{0}]".format(
            bad_node_config['nodename']))

        script_package = "etcd-reconfiguration-stop.tar"
        src_local = "parcel-center/{0}".format(bad_node_config["nodename"])
        dst_remote = "/home/{0}".format(bad_node_config["username"])

        if common.sftp_paramiko(src_local, dst_remote, script_package,
                                bad_node_config) == False:
            return

        commandline = "tar -xvf {0}.tar && sudo ./{0}/stop-etcd-server.sh".format(
            "etcd-reconfiguration-stop")

        if common.ssh_shell_paramiko(bad_node_config, commandline) == False:
            return

        self.logger.info(
            "Successfully stoping bad etcd server on node {0}".format(
                bad_node_config["nodename"]))

        if self.clean_flag:
            self.delete_packege(bad_node_config)
Beispiel #4
0
    def job_executer(self, node_config):

        self.logger.info("{0} job begins !".format(self.jobname))

        # sftp your script to remote host with paramiko.
        srcipt_package = "{0}.tar".format(self.jobname)
        src_local = "parcel-center/{0}".format(node_config["nodename"])
        dst_remote = common.get_user_dir(node_config)
        if common.sftp_paramiko(src_local, dst_remote, srcipt_package,
                                node_config) == False:
            sys.exit(1)

        commandline = "tar -xvf {0}.tar".format(self.jobname,
                                                node_config['hostip'])
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to uncompress {0}.tar".format(
                self.jobname))
            sys.exit(1)

        commandline = "sudo /bin/bash {0}/kubernetes-cleanup.sh".format(
            self.jobname)
        if self.force_flag:
            commandline += " -f"
        if common.ssh_shell_with_password_input_paramiko(
                node_config, commandline) == False:
            self.logger.error(
                "Failed to cleanup the kubernetes deployment on {0}".format(
                    node_config['hostip']))
            sys.exit(1)

        self.logger.info("Successfully running {0} job on node {1}".format(
            self.jobname, node_config["nodename"]))
Beispiel #5
0
    def job_executer_add_work_node(self):

        self.logger.info("{0} job begins !".format(self.jobname))

        # sftp your script to remote host with paramiko.
        srcipt_package = "{0}.tar".format(self.jobname)
        src_local = "parcel-center/{0}".format(self.node_config["nodename"])
        dst_remote = common.get_user_dir(self.node_config)

        if common.sftp_paramiko(src_local, dst_remote, srcipt_package, self.node_config) == False:
            sys.exit(1)

        commandline = "tar -xvf {0}.tar".format(self.jobname, self.node_config['hostip'])
        if common.ssh_shell_paramiko(self.node_config, commandline) == False:
            self.logger.error("Failed to uncompress {0}.tar".format(self.jobname))
            sys.exit(1)

        commandline = "sudo ./{0}/hosts-check.sh {1}".format(self.jobname, self.node_config['hostip'])
        if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False:
            self.logger.error("Failed to update the /etc/hosts on {0}".format(self.node_config['hostip']))
            sys.exit(1)

        commandline = "sudo ./{0}/docker-ce-install.sh".format(self.jobname)
        if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False:
            self.logger.error("Failed to install docker-ce on {0}".format(self.node_config['hostip']))
            sys.exit(1)

        commandline = "sudo ./{0}/kubelet-start.sh {0}".format(self.jobname)
        if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False:
            self.logger.error("Failed to bootstrap kubelet on {0}".format(self.node_config['hostip']))
            sys.exit(1)

        self.logger.info("Successfully running {0} job on node {1}".format(self.jobname, self.node_config["nodename"]))
Beispiel #6
0
    def job_executer(self, node_config, job_name):

        # sftp your script to remote host with paramiko.
        srcipt_package = "{0}.tar".format(job_name)
        src_local = "parcel-center/{0}".format(node_config["nodename"])
        dst_remote = "/home/{0}".format(node_config["username"])
        if common.sftp_paramiko(src_local, dst_remote, srcipt_package,
                                node_config) == False:
            return

        commandline = "tar -xvf {0}.tar".format(job_name,
                                                node_config['hostip'])
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to uncompress {0}.tar".format(job_name))
            return

        commandline = "sudo ./{0}/hosts-check.sh {1}".format(
            job_name, node_config['hostip'])
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to update the /etc/hosts on {0}".format(
                node_config['hostip']))
            return

        commandline = "sudo ./{0}/docker-ce-install.sh".format(job_name)
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to install docker-ce on {0}".format(
                node_config['hostip']))
            return

        commandline = "sudo ./{0}/kubelet-start.sh {0}".format(job_name)
        if common.ssh_shell_paramiko(node_config, commandline) == False:
            self.logger.error("Failed to bootstrap kubelet on {0}".format(
                node_config['hostip']))
            return

        self.logger.info("Successfully running {0} job on node {1}!".format(
            job_name, node_config['hostip']))
Beispiel #7
0
def check_docker_daemon_status(outputFile, configFilePath):
    cluster_config = common.load_yaml_file(configFilePath)
    node_configs = cluster_config['machine-list']
    username = ""
    password = ""
    sshport = ""

    if "default-machine-properties" in cluster_config:
        if "username" in cluster_config["default-machine-properties"]:
            username = cluster_config["default-machine-properties"]["username"]
        if "password" in cluster_config["default-machine-properties"]:
            password = cluster_config["default-machine-properties"]["password"]
        if "sshport" in cluster_config["default-machine-properties"]:
            port = cluster_config["default-machine-properties"]["sshport"]
    # execute cmd to check health
    cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi"
    errorNodeCout = 0
    for node_config in node_configs:
        try:
            if "username" not in node_config or "password" not in node_config or "sshport" not in node_config:
                node_config["username"] = username
                node_config["password"] = password
                node_config["port"] = port

            flag = common.ssh_shell_paramiko(node_config, cmd)
            if not flag:
                errorNodeCout += 1
                # single node docker health
                logger.error(
                    "node_current_docker_error{{instance=\"{}\"}} {}\n".format(
                        node_config["hostip"], 1))
        except:
            exception = sys.exc_info()
            for e in exception:
                logger.error("watchdog error {}".format(e))
            errorNodeCout += 1
            # single node docker health
            logger.error(
                "node_current_docker_error{{instance=\"{}\"}} {}\n".format(
                    node_config["hostip"], 1))

    if errorNodeCout > 0:
        # aggregate all nodes docker health total count
        logger.error("docker_error_node_count {}\n".format(errorNodeCout))
    outputFile.write("docker_error_node_count {}\n".format(errorNodeCout))
Beispiel #8
0
    def job_executer(self):

        print "repair job begins !"

        # sftp your script to remote host with paramiko.
        srcipt_package = "repair.tar"
        src_local = "parcel-center/{0}".format(self.node_config["nodename"])
        dst_remote = "/home/{0}".format(self.node_config["username"])

        if common.sftp_paramiko(src_local, dst_remote, srcipt_package, self.node_config) == False:
            return

        commandline = "tar -xvf repair.tar && sudo ./repair/repair-worker-node.sh"

        if common.ssh_shell_paramiko(self.node_config, commandline) == False:
            return

        print "Successfully running repair job on node {0}".format(self.node_config["nodename"])
Beispiel #9
0
def collect_docker_daemon_status(configFilePath):
    metrics = []

    cluster_config = common.load_yaml_file(configFilePath)
    node_configs = cluster_config['machine-list']
    username = ""
    password = ""
    sshport = ""

    if "default-machine-properties" in cluster_config:
        if "username" in cluster_config["default-machine-properties"]:
            username = cluster_config["default-machine-properties"]["username"]
        if "password" in cluster_config["default-machine-properties"]:
            password = cluster_config["default-machine-properties"]["password"]
        if "sshport" in cluster_config["default-machine-properties"]:
            port = cluster_config["default-machine-properties"]["sshport"]

    cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi"
    errorNodeCout = 0

    for node_config in node_configs:
        ip = node_config["hostip"]
        label = {"instance": ip}

        try:
            if "username" not in node_config or "password" not in node_config or "sshport" not in node_config:
                node_config["username"] = username
                node_config["password"] = password
                node_config["port"] = port

            flag = common.ssh_shell_paramiko(node_config, cmd)
            if not flag:
                errorNodeCout += 1
                # single node docker health
                metrics.append(Metric("node_current_docker_error", label, 1))
        except Exception as e:
            logger.exception("ssh to %s failed", ip)
            errorNodeCout += 1
            metrics.append(Metric("node_current_docker_error", label, 1))

    if errorNodeCout > 0:
        metrics.append(Metric("docker_error_node_count", {}, errorNodeCout))

    return metrics
Beispiel #10
0
def collect_docker_daemon_status(hosts):
    metrics = []

    cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi"

    for host in hosts:
        label = {"ip": host["hostip"], "error": "ok"}

        try:
            flag = common.ssh_shell_paramiko(host, cmd)
            if not flag:
                label["error"] = "config"  # configuration is not correct
        except Exception as e:
            label["error"] = str(e)
            logger.exception("ssh to %s failed", host["hostip"])

        metrics.append(Metric("docker_daemon_count", label, 1))

    return metrics
Beispiel #11
0
    def restart_etcd_server(self, bad_node_config):

        self.logger.info(
            "Begin to execute the job : etcd-reconfiguration-restart.")
        self.logger.info("Restart etcd server on host [{0}].".format(
            bad_node_config['nodename']))

        new_etcd_cluster_ips_peer = self.get_etcd_peer_ip_list(bad_node_config)

        self.cluster_config['clusterinfo'][
            'etcd_cluster_ips_peer'] = new_etcd_cluster_ips_peer
        self.cluster_config['clusterinfo'][
            'etcd-initial-cluster-state'] = 'existing'

        self.prepare_package(bad_node_config, "etcd-reconfiguration-restart")

        script_package = "etcd-reconfiguration-restart.tar"
        src_local = "parcel-center/{0}".format(bad_node_config["nodename"])
        dst_remote = "/home/{0}".format(bad_node_config["username"])

        if common.sftp_paramiko(src_local, dst_remote, script_package,
                                bad_node_config) == False:
            return

        commandline = "tar -xvf {0}.tar && sudo ./{0}/{1}.sh".format(
            "etcd-reconfiguration-restart", "restart-etcd-server")

        if common.ssh_shell_paramiko(bad_node_config, commandline) == False:
            return

        self.logger.info(
            "Successfully restarting bad etcd server on node {0}".format(
                bad_node_config["nodename"]))

        if self.clean_flag:
            self.delete_packege(bad_node_config)
Beispiel #12
0
    def job_executer_clean_up_node(self):

        self.logger.info("{0} job begins !".format(self.jobname))

        commandline = "kubectl delete node {0}".format(
            self.node_config['nodename'])
        common.execute_shell(
            commandline,
            "Failed to delete  node {0}".format(self.node_config['nodename']))

        # sftp your script to remote host with paramiko.
        srcipt_package = "{0}.tar".format(self.jobname)
        src_local = "parcel-center/{0}".format(self.node_config["nodename"])
        dst_remote = common.get_user_dir(self.node_config)

        if common.sftp_paramiko(src_local, dst_remote, srcipt_package,
                                self.node_config) == False:
            return

        commandline = "tar -xvf {0}.tar".format(self.jobname,
                                                self.node_config['hostip'])
        if common.ssh_shell_paramiko(self.node_config, commandline) == False:
            self.logger.error("Failed to uncompress {0}.tar".format(
                self.jobname))
            return

        commandline = "sudo ./{0}/kubernetes-cleanup.sh".format(self.jobname)
        if common.ssh_shell_with_password_input_paramiko(
                self.node_config, commandline) == False:
            self.logger.error(
                "Failed to cleanup the kubernetes deployment on {0}".format(
                    self.node_config['hostip']))
            return

        self.logger.info("Successfully running {0} job on node {1}".format(
            self.jobname, self.node_config["nodename"]))
Beispiel #13
0
    def remote_host_cleaner(self, node_config):

        commandline = "sudo rm -rf {0}*".format(self.jobname)

        if common.ssh_shell_paramiko(node_config, commandline) == False:
            return