Python Kubectl Examples

Programming Language: Python

Namespace/Package Name: kubectl

Class/Type: Kubectl

Examples at hotexamples.com: 10

Python Kubectl - 10 examples found. These are the top rated real world Python examples of kubectl.Kubectl extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Kubectl(16)

top_pod(2)

run_kubectl(1)

Example #1

Show file

class WriteLog:
    k = Kubectl()
    wait_time = 30
    metric_item_list = ["cpu_value", "memory_value"]
    limit_item_list = ["pod_cpu_limits", "pod_memory_limits"]
    request_item_list = ["pod_cpu_requests", "pod_memory_requests"]
    app_list = {}
    app_name = ""
    namespace = ""
    cpu_limit = 0
    mem_limit = 0
    oc = OC()
    app_type = ""

    def __init__(self):
        pass

    def find_deploymentconfig_by_namespace(self, app_name):
        deployment_name_list = []
        output = ""
        if self.app_type == "deployment":
            output = self.oc.get_deployment(self.namespace)
        elif self.app_type == "deploymentconfig":
            output = self.oc.get_deploymentconfig(self.namespace)
        elif self.app_type == "statefulset":
            output = self.oc.get_statefulset(self.namespace)
        for line in output.split("\n"):
            if line.find(app_name) != -1:
                deployment_name = line.split()[0]
                deployment_name_list.append(deployment_name)
        return deployment_name_list

    def find_pod_by_namespace(self, app_name):
        pod_name_list = []
        output = self.oc.get_pods(self.namespace)
        for line in output.split("\n"):
            if line.find(app_name) != -1:
                pod_name = line.split()[0]
                if pod_name.find("build") != -1:
                    continue
                pod_name_list.append(pod_name)
        return pod_name_list

    def get_deploymentconfig(self):
        self.app_list = {}
        # print ("---get deployment info---")
        deployment_name_list = self.find_deploymentconfig_by_namespace(
            self.app_name)
        for deployment in deployment_name_list:
            self.app_list[deployment] = {}
        # print self.app_list

    def get_pod_info(self):
        # print ("---get pod info---")
        pod_name_list = self.find_pod_by_namespace(self.app_name)
        for pod_name in pod_name_list:
            for deployment in self.app_list.keys():
                if pod_name.find(deployment) != -1:
                    self.app_list[deployment][pod_name] = {}
        # print self.app_list

    def get_metrics(self):
        # print ("---get metrics---")
        self.kubectl = Kubectl()
        for metric_item in self.metric_item_list:
            for deployment in self.app_list.keys():
                for pod_name in self.app_list[deployment]:
                    self.app_list[deployment][pod_name][metric_item] = 0
        for deployment in self.app_list.keys():
            for pod_name in self.app_list[deployment].keys():
                output = self.kubectl.top_pod(pod_name, self.namespace)
                for line in output.split("\n"):
                    if line.find(pod_name) != -1:
                        # by kubectl top
                        cpu = int(line.split()[-2].strip("m"))  # mCore
                        memory = int(line.split()[-1].strip("Mi"))  # MB
                        self.app_list[deployment][pod_name]["cpu_value"] = cpu
                        self.app_list[deployment][pod_name][
                            "memory_value"] = memory
        # print self.app_list

    def get_pod_limit(self, pod_name):
        #print ("---get pod limit---")
        cpu_limit = 0
        memory_limit = 0
        cpu_limit_mcore = "0m"
        memory_limit_mb = "0Mi"
        output = self.oc.get_pod_json(pod_name, self.namespace)
        if output:
            try:
                output = json.loads(output)
                if output.get("spec", {}).get("containers",
                                              [])[0].get("resources"):
                    cpu_limit_mcore = output.get("spec", {}).get(
                        "containers",
                        [])[0].get("resources").get("limits").get("cpu", "0m")
                if cpu_limit_mcore and cpu_limit_mcore.find("m") != -1:
                    cpu_limit = float(cpu_limit_mcore.split("m")[0])
                else:
                    cpu_limit = float(cpu_limit_mcore) * 1000
                if output.get("spec", {}).get("containers",
                                              [])[0].get("resources"):
                    memory_limit_mb = output.get("spec", {}).get(
                        "containers",
                        [])[0].get("resources").get("limits").get(
                            "memory", "0Mi")
                if memory_limit_mb and memory_limit_mb.find("M") != -1:
                    memory_limit = float(memory_limit_mb.split("M")[0])
                elif memory_limit_mb and memory_limit_mb.find("G") != -1:
                    memory_limit = float(memory_limit_mb.split("G")[0]) * 1000
            except Exception as e:
                print "failed to get limits: %s" % str(e)
        return cpu_limit, memory_limit

    def get_limits(self):
        output = {}
        for metric_item in self.limit_item_list:
            for deployment in self.app_list.keys():
                for pod_name in self.app_list[deployment].keys():
                    cpu_limit, memory_limit = self.get_pod_limit(pod_name)
                    if metric_item == "pod_cpu_limits":
                        self.app_list[deployment][pod_name][
                            metric_item] = cpu_limit
                    else:
                        self.app_list[deployment][pod_name][
                            metric_item] = memory_limit

    def get_pod_reason(self, pod_name):
        reason_list = []
        output = self.oc.get_pod_json(pod_name, self.namespace)
        if output:
            output = json.loads(output)
            if output.get("status").get("containerStatuses")[0].get(
                    "lastState"):
                terminated = output.get("status").get("containerStatuses")[
                    0].get("lastState").get("terminated").get("reason")
                reason_list.append(terminated)
        return reason_list

    def get_status(self, is_reason=True):
        output = self.oc.get_pods(self.namespace)
        for deployment in self.app_list.keys():
            for pod_name in self.app_list[deployment].keys():
                for line in output.split("\n"):
                    if line.find(self.app_name) != -1:
                        pod = line.split()[0]
                        if pod == pod_name:
                            status = line.split()[2]
                            restart = int(line.split()[3])
                            self.app_list[deployment][pod_name][
                                "status"] = status
                            self.app_list[deployment][pod_name][
                                "restart"] = restart
                            if is_reason:
                                reason_list = self.get_pod_reason(pod_name)
                                self.app_list[deployment][pod_name][
                                    "reason"] = reason_list

    def get_node_status(self):
        # print "get node status"
        node_info = {}
        output = self.oc.get_nodes()
        for line in output.split("\n"):
            if line.find("NAME") == -1 and line:
                node_name = line.split()[0]
                status = line.split()[1]
                node_info[node_name] = {}
                node_info[node_name]["status"] = status
                usage_output = self.k.top_node(node_name)
                for line in usage_output.split("\n"):
                    if line.find(node_name) != -1:
                        cpu = int(line.split()[1].split("m")[0])
                        memory = int(line.split()[3].split("Mi")[0])
                        node_info[node_name]["cpu"] = cpu
                        node_info[node_name]["memory"] = memory
        # print node_info
        return node_info

    def calculate_overlimit(self, algo, time_count):
        cpu_count = 0
        memory_count = 0
        count = 0
        total_restart = 0
        total_terminated = 0
        data_count = int(time_count * 60 / self.wait_time)
        print "--- %s collect data and write to logs for %d minutes ---" % (
            algo.split("_")[0].upper(), time_count)
        for i in range(data_count):
            start_time = time.time()
            self.get_deploymentconfig()
            self.get_pod_info()
            self.get_limits()
            self.get_metrics()
            self.get_status()
            print "--- %s start to collect data at %d/%d interval(in 30 sec) ---" % (
                algo.split("_")[0], i, data_interval * 2)
            for deployment in self.app_list.keys():
                cpu_limit = 0
                memory_limit = 0
                total_cpu = 0
                total_memory = 0
                total_cpu_limit = 0
                total_memory_limit = 0
                # pod
                for pod in self.app_list[deployment].keys():
                    if self.app_list[deployment][pod].get("pod_cpu_limits"):
                        cpu_limit = self.app_list[deployment][pod][
                            "pod_cpu_limits"]
                        memory_limit = self.app_list[deployment][pod][
                            "pod_memory_limits"]
                    cpu = self.app_list[deployment][pod]["cpu_value"]
                    memory = self.app_list[deployment][pod]["memory_value"]
                    total_cpu += cpu
                    total_memory += memory
                    total_cpu_limit += cpu_limit
                    total_memory_limit += memory_limit
                    if cpu >= cpu_limit and cpu_limit != 0:
                        cpu_count += 1
                    if memory >= memory_limit and memory_limit != 0:
                        memory_count += 1
                    restart = self.app_list[deployment][pod].get("restart", 0)
                    total_restart += restart
                    reason = self.app_list[deployment][pod].get("reason", [])
                    total_terminated += len(reason)
                num_replica = len(self.app_list[deployment].keys())
                print self.app_name, "total_cpu=", total_cpu, "m"
                print self.app_name, "total_memory=", total_memory, "Mi"
                print self.app_name, "current replica=%d" % num_replica
                print self.app_name, "overflow=", cpu_count, "times"
                print self.app_name, "oom=", memory_count, "times"
                print self.app_name, "restart=", total_restart, "times"
                print self.app_name, "terminated=", total_terminated, "times"
                print "\n"
                total_status = 0
                algo_name = "%s-%s" % (self.app_name, algo)
                data = [
                    algo_name, total_cpu, total_cpu_limit, total_memory,
                    total_memory_limit, cpu_count, memory_count, num_replica,
                    restart, total_status
                ]
                self.write_metric(data)
            # print "wait %d seconds" % self.wait_time
            # correct time
            interval = 30
            for j in range(interval):
                end_time = time.time()
                if end_time - start_time >= interval:
                    start_time = start_time + interval
                    break
                time.sleep(5)

    def write_metric(self, data):
        # print "write metrics"
        timestamp = str(int(time.time()))
        data.append(timestamp)
        try:
            pod_name = data[0]
            fn = "./metrics/%s" % pod_name
            with open(fn, "a") as f:
                line = " ".join([str(elem) for elem in data])
                f.write("%s\n" % str(line))
        except Exception as e:
            print "failed to write metrics:%s" % str(e)

Example #2

Show file

File: checks.py Project: laashub-soa/skuba

def check_system_pods_ready(conf, platform):
    kubectl = Kubectl(conf, platform)
    return check_pods_ready(kubectl, namespace="kube-system")

Example #3

Show file

File: conftest.py Project: thehejik/skuba

def kubectl(conf):
    return Kubectl(conf)

Example #4

Show file

File: zookeeper.py Project: prophetstor-ai/federatorai-demo-kafka

class Zookeeper(Client):
    oc = OC()
    k = Kubectl()
    w = WriteLog()

    def __init__(self):
        super(Zookeeper, self).__init__()
        self.namespace = "myproject"
        self.app_name = "my-cluster-zookeeper"
        self.app_type = "statefulset"
        self.w.namespace = self.namespace
        self.w.app_name = self.app_name
        self.w.app_type = self.app_type

    def wait_time(self, value):
        # print "wait %d seconds" % value
        time.sleep(value)

    def calculate_pod_info(self):
        app_cpu_value = 0
        app_memory_value = 0
        app_cpu_limit = 0
        app_memory_limit = 0
        app_restart = 0
        app_status_running = 0
        app_status_crashloopbackoff = 0
        app_status_oomkilled = 0

        for pod in self.w.app_list[self.app_name].keys():
            for item in self.w.app_list[self.app_name][pod].keys():
                if item in ["cpu_value"]:
                    app_cpu_value += self.w.app_list[
                        self.app_name][pod]["cpu_value"]
                elif item in ["memory_value"]:
                    app_memory_value += self.w.app_list[
                        self.app_name][pod]["memory_value"]
                elif item in ["pod_cpu_limits"]:
                    app_cpu_limit += self.w.app_list[
                        self.app_name][pod]["pod_cpu_limits"]
                elif item in ["pod_memory_limits"]:
                    app_memory_limit += self.w.app_list[
                        self.app_name][pod]["pod_memory_limits"]
                elif item in ["restart"]:
                    app_restart += self.w.app_list[
                        self.app_name][pod]["restart"]
                elif item == "status":
                    status = self.w.app_list[self.app_name][pod]["status"]
                    if status in ["Running"]:
                        app_status_running += 1
                    if status in ["CrashLoopBackOff"]:
                        app_status_crashloopbackoff += 1
                    if status in ["OOMKilled"]:
                        app_status_oomkilled += 1

        print "- Zookeepers: CPU %s/%s mCore; Memory %s/%s Mi; Restart %s" % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart)
        output = "%s %s %s %s %s %s %s %s " % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart, app_status_running, app_status_crashloopbackoff,
            app_status_oomkilled)
        return output

    def calculate_overlimit(self):
        app_cpu_overlimit = 0
        app_memory_overlimit = 0

        # calculate overlimit
        for pod in self.w.app_list[self.app_name].keys():
            cpu_value = self.w.app_list[self.app_name][pod]["cpu_value"]
            memory_value = self.w.app_list[self.app_name][pod]["memory_value"]
            cpu_limit = self.w.app_list[self.app_name][pod]["pod_cpu_limits"]
            memory_limit = self.w.app_list[
                self.app_name][pod]["pod_memory_limits"]
            if cpu_limit <= cpu_value:
                app_cpu_overlimit += 1
            if memory_limit <= memory_value:
                app_memory_overlimit += 1
        num_replica = len(self.w.app_list[self.app_name].keys())
        print "- Zookeepers: OverLimit %s; OOM: %s\n" % (app_cpu_overlimit,
                                                         app_memory_overlimit)
        output = "%s %s %s" % (app_cpu_overlimit, app_memory_overlimit,
                               num_replica)
        return output

    def write_logs(self, algo_name):
        self.w.get_deploymentconfig()
        self.w.get_pod_info()
        self.w.get_limits()
        self.w.get_metrics()
        self.w.get_status()

        file_name = "%s/%s_zookeeper_metrics" % (traffic_path, algo_name)
        timestamp = int(time.time())
        line = "%s " % (timestamp)
        line += self.calculate_pod_info()
        line += self.calculate_overlimit()
        line += "\n"

        try:
            with open(file_name, "a") as f:
                f.write(line)
        except Exception as e:
            print "failed to write zookeeper logs(%s): %s" % (file_name,
                                                              str(e))
            return -1

        # print "success to write zookeeper logs(%s)" % file_name
        return 0

Example #5

Show file

class Consumer(Client):
    oc = OC()
    k = Kubectl()
    w = WriteLog()

    def __init__(self):
        super(Consumer, self).__init__()
        self.namespace = "myproject"
        self.app_name = "consumer"
        self.app_type = "deployment"
        self.w.namespace = self.namespace
        self.w.app_name = self.app_name
        self.w.app_type = self.app_type

    def wait_time(self, value):
        # print "wait %d seconds" % value
        time.sleep(value)

    def calculate_pod_info(self):
        app_cpu_value = 0
        app_memory_value = 0
        app_cpu_limit = 0
        app_memory_limit = 0
        app_restart = 0
        app_status_running = 0
        app_status_crashloopbackoff = 0
        app_status_oomkilled = 0
        for pod in self.w.app_list[self.app_name].keys():
            for item in self.w.app_list[self.app_name][pod].keys():
                if item in ["cpu_value"]:
                    app_cpu_value += self.w.app_list[
                        self.app_name][pod]["cpu_value"]
                elif item in ["memory_value"]:
                    app_memory_value += self.w.app_list[
                        self.app_name][pod]["memory_value"]
                elif item in ["pod_cpu_limits"]:
                    app_cpu_limit += self.w.app_list[
                        self.app_name][pod]["pod_cpu_limits"]
                elif item in ["pod_memory_limits"]:
                    app_memory_limit += self.w.app_list[
                        self.app_name][pod]["pod_memory_limits"]
                elif item in ["restart"]:
                    app_restart += self.w.app_list[
                        self.app_name][pod]["restart"]
                elif item == "status":
                    status = self.w.app_list[self.app_name][pod]["status"]
                    if status in ["Running"]:
                        app_status_running += 1
                    if status in ["CrashLoopBackOff"]:
                        app_status_crashloopbackoff += 1
                elif item == "reason":
                    reason_list = self.w.app_list[self.app_name][pod]["reason"]
                    for reason in reason_list:
                        if reason == "OOMKilled":
                            app_status_oomkilled += 1
        print "- Consumers: CPU %s/%s mCore; Memory %s/%s Mi; Restart %s OOMKilled %s" % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart, app_status_oomkilled)
        output = "%s %s %s %s %s %s %s %s " % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart, app_status_running, app_status_crashloopbackoff,
            app_status_oomkilled)
        return output

    def calculate_overlimit(self):
        app_cpu_overlimit = 0
        app_memory_overlimit = 0

        # calculate overlimit
        for pod in self.w.app_list[self.app_name].keys():
            cpu_value = self.w.app_list[self.app_name][pod]["cpu_value"]
            memory_value = self.w.app_list[self.app_name][pod]["memory_value"]
            cpu_limit = self.w.app_list[self.app_name][pod]["pod_cpu_limits"]
            memory_limit = self.w.app_list[
                self.app_name][pod]["pod_memory_limits"]
            if cpu_limit <= cpu_value:
                app_cpu_overlimit += 1
            if memory_limit <= memory_value:
                app_memory_overlimit += 1
        num_replica = len(self.w.app_list[self.app_name].keys())
        print "- Consumers: Replica: %s\n" % (num_replica)
        output = "%s %s %s " % (app_cpu_overlimit, app_memory_overlimit,
                                num_replica)
        return output

    def calculate_performance(self, group_name, topic_name):
        total_lag = 0
        total_log_offset = 0
        total_current_offset = 0
        active_client = 0
        inactive_client = 0
        partition_list = []
        active_client_list = []
        start_time = time.time()
        num_sample = 3
        # print "--------", group_name, topic_name
        for i in range(num_sample):
            output = self.describe_consumer_group(group_name)
            print "==="
            print "%s" % output
            print "==="
            for line in output.split("\n"):
                if line and line.find(topic_name) != -1 and line.find(
                        "Error") == -1:
                    partition = int(line.split()[2])
                    if partition not in partition_list:
                        partition_list.append(partition)
                    current_offset = int(line.split()[3])
                    log_offset = int(line.split()[4])
                    lag = int(line.split()[5])
                    consumer_id = line.split()[6]
                    total_log_offset += log_offset
                    total_current_offset += current_offset
                    total_lag += lag
                    if consumer_id.find("consumer-1") == -1:
                        inactive_client += 1
                    if consumer_id not in active_client_list:
                        active_client_list.append(consumer_id)
            # print i, "total describe lag=", lag, time.time()
        total_lag = total_lag / (num_sample * 1.0)
        total_log_offset = total_log_offset / (num_sample * 1.0)
        total_current_offset = total_current_offset / (num_sample * 1.0)
        inactive_client = inactive_client / (num_sample * 1.0)
        active_client = len(active_client_list)
        print "- Consumers: Log Offset %s;" % total_log_offset, "Current Offset %s;" % total_current_offset, "Lag %s;" % total_lag
        print "- Consumers: Active %s;" % active_client, "Inactive %s" % inactive_client
        print "\n"
        output = "%s %s %s %s %s %s %s %s " % (
            group_name, topic_name, total_lag, active_client, inactive_client,
            total_log_offset, total_current_offset, len(partition_list))
        end_time = time.time()
        #print ">> describe time = ", end_time - start_time
        return output

    def write_logs(self, algo_name, group_name, topic_name):
        self.w.get_deploymentconfig()
        self.w.get_pod_info()
        self.w.get_limits()
        self.w.get_metrics()
        self.w.get_status()

        file_name = "%s/%s_consumer_metrics" % (traffic_path, algo_name)
        timestamp = int(time.time())
        line = "%s " % (timestamp)
        line += self.calculate_pod_info()
        line += self.calculate_overlimit()
        # hungo test - block calculate (per maygy)
        #line += self.calculate_performance(group_name, topic_name)
        line += "\n"

        try:
            with open(file_name, "a") as f:
                f.write(line)
        except Exception as e:
            print "failed to write consumer logs(%s): %s" % (file_name, str(e))
            return -1

        # print "success to write consumer logs(%s)" % file_name
        return 0

    def delete_all_consumer_groups(self):
        # delete all consumer groups
        group_list = self.list_consumer_group()
        for group in group_list:
            output = self.delete_consumer_group(group)

Example #6

Show file

def inhibit_kured(options):
    Kubectl(options.conf).inhibit_kured()

Example #7

Show file

File: conftest.py Project: danielorf/skuba

def kubectl(conf, target):
    return Kubectl(conf, target)

Example #8

Show file

File: train_traffic.py Project: prophetstor-ai/federatorai-demo-nginx

class Training:
    k = Kubectl()
    o = OC()
    n = Nginx()

    def __init__(self):
        #self.o.login("admin", "password")
        test = ""

    def get_node_list(self):
        node_list = []
        output = self.o.get_nodes()
        for line in output.split("\n"):
            if line.find("NAME") == -1 and line:
                node_name = line.split()[0]
                node_list.append(node_name)
        return node_list

    def get_node_usage(self):
        # kubectl top node h5-135
        # NAME      CPU(cores)   CPU%      MEMORY(bytes)   MEMORY%
        # h5-135    655m         8%        5703Mi          17%
        node_usage = {}
        node_usage["cpu"] = {}
        node_usage["memory"] = {}
        node_list = self.get_node_list()
        for node in node_list:
            output = self.k.top_node(node)
            for line in output.split("\n"):
                if line.find("NAME") == -1 and line:
                    cpu_usage = int(line.split()[2].split("%")[0])
                    memory_usage = int(line.split()[-1].split("%")[0])
                    node_usage["cpu"][node] = cpu_usage
                    node_usage["memory"][node] = memory_usage
        avg_node_usage = sum(node_usage["cpu"].values()) / len(
            node_usage["cpu"].values())
        max_node_usage = max(node_usage["cpu"].values())
        return max_node_usage, avg_node_usage

    def get_pod_usage(self, app_name, app_namespace):
        pod_usage = {}
        pod_usage["cpu"] = {}
        pod_usage["memory"] = {}
        pod_name_list = find_pod_name(app_name, app_namespace)
        for pod in pod_name_list:
            output = self.k.top_pod(pod, app_namespace)
            for line in output.split("\n"):
                if line.find("NAME") == -1 and line:
                    cpu_usage = int(line.split()[1].split("m")[0])
                    memory_usage = int(line.split()[-1].split("M")[0])
                    pod_usage["cpu"][pod] = cpu_usage
                    pod_usage["memory"][pod] = memory_usage
        avg_pod_usage = sum(pod_usage["cpu"].values()) / len(
            pod_usage["cpu"].values())
        max_pod_usage = max(pod_usage["cpu"].values())
        num_pod = len(pod_name_list)
        return max_pod_usage, avg_pod_usage, num_pod

    def import_traffic(self, ratio, i):
        cmd = "python ./run_ab.py %d %d &" % (0, ratio)
        ret = os.system(cmd)
        return ret

    def get_traffic_info(self):
        dir_name = "./traffic"
        traffic_file_list = os.listdir(dir_name)
        latency_list = []
        for traffic in traffic_file_list:
            traffic_file = "./%s/%s" % (dir_name, traffic)
            if os.path.exists(traffic_file):
                with open(traffic_file, "r") as f:
                    output = f.read()
                    for line in output.split("\n"):
                        if line.find("Connect:  ") != -1:
                            avg_connect_latency = int(line.split()[2])
                            latency_list.append(avg_connect_latency)
        return latency_list

    def collect_usage(self, app_namespace, app_name):
        data = {}
        max_node_usage_list = []
        avg_node_usage_list = []
        max_pod_usage_list = []
        avg_pod_usage_list = []
        start_time = time.time()
        timeout = 120
        print "collect %ds resource usage" % timeout
        while True:
            end_time = time.time()
            if end_time - start_time > timeout:
                print "time is up to %ds..." % timeout
                break
            max_node_usage, avg_node_usage = self.get_node_usage()
            max_pod_usage, avg_pod_usage, num_pod = self.get_pod_usage(
                app_name, app_namespace)
            self.get_traffic_info()
            max_node_usage_list.append(max_node_usage)
            avg_node_usage_list.append(avg_node_usage)
            max_pod_usage_list.append(max_pod_usage)
            avg_pod_usage_list.append(avg_pod_usage)
            time.sleep(5)
        connect_latency_list = self.get_traffic_info()
        max_node_usage = sum(max_node_usage_list) / len(max_node_usage_list)
        avg_node_usage = sum(avg_node_usage_list) / len(avg_node_usage_list)
        max_pod_usage = sum(max_pod_usage_list) / len(max_pod_usage_list)
        avg_pod_usage = sum(avg_pod_usage_list) / len(avg_pod_usage_list)
        avg_connect_latency = sum(connect_latency_list) / len(
            connect_latency_list)
        print "max. node =", max_node_usage, "%"
        print "avg. node =", avg_node_usage, "%"
        print "max. pod = ", max_pod_usage, "m"
        print "avg. pod = ", avg_pod_usage, "m"
        print "avg. connect latency = ", avg_connect_latency, "ms"
        data["max_node"] = max_node_usage
        data["avg_node"] = avg_node_usage
        data["max_pod"] = max_pod_usage
        data["avg_pod"] = avg_pod_usage
        data["avg_connect_latency"] = avg_connect_latency
        return data

Example #9

Show file

File: client.py Project: prophetstor-ai/federatorai-demo-kafka

class Client(object):
    oc = OC()
    kubectl = Kubectl()
    zookeeper = ""

    def __init__(self):
        pass

    def find_broker_ip(self):
        ns = ""
        ip = ""
        port = ""
        output = self.oc.get_services_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("my-cluster") != -1 and line.find("bootstrap") == -1 and line.find("zookeeper") == -1 and line.find("exporter") == -1:
                    ns = line.split()[0]
                    ip = line.split()[1]
                    #port = line.split()[5].split("/")[0].split(":")[0]
        except Exception as e:
            print "it cannot find broker ip: %s" % str(e)
            return ns, ip, port
        print "find broker ip (%s:%s)" % (ip, port)
        # Hard core port to 9092
        return ns, ip, 9092

    def find_zookeeper_ip(self):
        ns = ""
        ip = ""
        port = ""
        output = self.oc.get_services_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("zookeeper-client") != -1 and line.find("zookeeper-headless") == -1:
                    ns = line.split()[0]
                    ip = line.split()[1]
                    #port = line.split()[5].split("/")[0].split(":")[0]
        except Exception as e:
            print "it cannot find zookeeper ip: %s" % str(e)
            return ns, ip, port
        # print "find zookeeper ip (%s:%s)" % (ip, port)
        # hard code port to 2181
        return ns, ip, 2181

    def find_producer_pod(self):
        ns = ""
        pod_list = []
        output = self.oc.get_pods_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("producer") != -1 and line.find("Running") != -1:
                    ns = line.split()[0]
                    pod = line.split()[1]
                    pod_list.append(pod)
        except Exception as e:
            print "it cannot find producer pod: %s" % str(e)
            return ns, pod_list
        # print "find %s producers in ns (%s)" % (len(pod_list), ns)
        return ns, pod_list

    def find_consumer_pod(self):
        ns = ""
        pod_list = []
        output = self.oc.get_pods_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("consumer") != -1 and line.find("Running") != -1:
                    ns = line.split()[0]
                    pod = line.split()[1]
                    pod_list.append(pod)
        except Exception as e:
            print "it cannot find consumer pod: %s" % str(e)
            return ns, pod_list
        # print "find %s consumers in ns (%s)" % (len(pod_list), ns)
        return ns, pod_list

    def find_zookeeper_pod(self):
        ns = ""
        pod_list = []
        output = self.oc.get_pods_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("zookeeper-client") != -1:
                    ns = line.split()[0]
                    pod = line.split()[1]
                    pod_list.append(pod)
        except Exception as e:
            print "it cannot find consumer pod: %s" % str(e)
            return ns, pod_list
        print "find %s zookeepers in ns (%s)" % (len(pod_list), ns)
        return ns, pod_list

    def find_broker_pod(self):
        ns = ""
        pod_list = []
        output = self.oc.get_pods_all_namespace()
        try:
            for line in output.split("\n"):
                if line.find("my-cluster") != -1 and line.find("export") == -1 and line.find("operator") == -1 and line.find("zookeeper") == -1:
                    ns = line.split()[0]
                    pod = line.split()[1]
                    pod_list.append(pod)
        except Exception as e:
            print "it cannot find consumer pod: %s" % str(e)
            return ns, pod_list
        print "find %s brokers in ns (%s)" % (len(pod_list), ns)
        return ns, pod_list

    def list_topic(self):
        topic_list = []
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-topics.sh --bootstrap-server %s:%s --list" % (ip, port)
        output = self.oc.exec_cmd(ns, pod, cmd)
        if not output:
            print "there is no topics in %s" % pod
        else:
            for line in output.split("\n"):
                if line:
                    item = line.split()[0]
                    if item and item not in topic_list:
                        topic_list.append(item)
        print "current topics: %s" % ",".join(topic_list)
        return topic_list

    def describe_topic(self, topic_name):
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-topics.sh --bootstrap-server %s:%s --describe --topic %s" % (ip, port, topic_name)
        output = self.oc.exec_cmd(ns, pod, cmd)
        return output

    def create_topic(self, topic_name):
        # references: https://blog.csdn.net/u010886217/article/details/83119774
        # --replication-factor<=number of brokers
        # --partitions: 1x or 2x number of brokers
        ns, broker_list = self.find_broker_pod()
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        partition = len(broker_list)
        replication = len(broker_list)
        cmd = "/opt/kafka/bin/kafka-topics.sh --bootstrap-server %s:%s --topic %s --create --partitions %d --replication-factor %d" % (ip, port, topic_name, partition, replication)
        print cmd
        output = self.oc.exec_cmd(ns, pod, cmd)
        print output
        return output

    def delete_topic(self, topic_name):
        print "delete topic:", topic_name
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-topics.sh --delete --bootstrap-server %s:%s --topic %s delete.topic.enable=true" % (ip, port, topic_name)
        output = self.oc.exec_cmd(ns, pod, cmd)
        return output

    def modify_topic(self, topic_name, num_partition):
        print "modify topic:", topic_name
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-topics.sh --alter --bootstrap-server %s:%s --topic %s --partitions %s" % (ip, port, topic_name, num_partition)
        print cmd
        output = self.oc.exec_cmd(ns, pod, cmd)
        print output
        return output

    def list_consumer_group(self):
        # print "--- list consumer group ---"
        group_list = []
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        if not pod_list:
            raise Exception("consumer is not existed")
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-consumer-groups.sh --bootstrap-server %s:%s --list" % (ip, port)
        output = self.oc.exec_cmd(ns, pod, cmd)
        for group in output.split("\n"):
            if group and group.find("Note") == -1:
                group_list.append(group)
        return group_list

    def describe_consumer_group(self, consumer_group_name):
        # print "describe consumer group: ", consumer_group_name
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-consumer-groups.sh --bootstrap-server %s:%s --describe --group %s" % (ip, port, consumer_group_name)
        output = self.oc.exec_cmd(ns, pod, cmd)
        return output

    def delete_consumer_group(self, consumer_group_name):
        print "delete consumer group: ", consumer_group_name
        # only delete consumer group by zookeeper
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        pod = pod_list[0]
        cmd = "/opt/kafka/bin/kafka-consumer-groups.sh --bootstrap-server %s:%s --delete --group %s" % (ip, port, consumer_group_name)
        print cmd
        output = self.oc.exec_cmd(ns, pod, cmd)
        return output

    def producer_per_test(self, topic_name, message_count):
        # reference1: https://gist.github.com/ueokande/b96eadd798fff852551b80962862bfb3
        # reference2: https://blog.csdn.net/tom_fans/article/details/75517367
        # print "--- producer_per_test ---"
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_producer_pod()
        if not pod_list:
            raise Exception("producer is not existed")
        pod_info = {}
        record_size = message_size
        for pod in pod_list:
            pod_info[pod] = {}
            cmd = "/opt/kafka/bin/kafka-producer-perf-test.sh --topic %s --num-records %s --record-size %s --throughput 1000000 --producer-props bootstrap.servers=%s:%s" % (topic_name, message_count, record_size, ip, port)  
            print cmd
            output = self.oc.exec_cmd(ns, pod, cmd)
            #print "%s: " % pod, output
            if not output:
                raise Exception("failed to produces messages")
            print "%s produces %s messages for topic %s" % (pod, message_count, topic_name)
            try:
                for line in output.split("\n"):
                    if line and len(line.split()) > 20 and line.find("OpenJDK") == -1:
                        pod_info[pod]["record"] = int(line.split()[0])
                        pod_info[pod]["throughput"] = float(line.split()[3])
                        pod_info[pod]["avg_latency"] = float(line.split()[7])
                        pod_info[pod]["max_latency"] = float(line.split()[11])
                        pod_info[pod]["50th_latency"] = float(line.split()[15])
                        pod_info[pod]["95th_latency"] = float(line.split()[18])
                        pod_info[pod]["99th_latency"] = float(line.split()[21])
                        pod_info[pod]["99.9th_latency"] = float(line.split()[24])
            except Exception as e:
                print "failed to get producer metrics: %s" % str(e)
                pod_info[pod]["record"] = 0
                pod_info[pod]["throughput"] = 0
                pod_info[pod]["avg_latency"] = 0
                pod_info[pod]["max_latency"] = 0
                pod_info[pod]["50th_latency"] = 0
                pod_info[pod]["95th_latency"] = 0
                pod_info[pod]["99th_latency"] = 0
                pod_info[pod]["99.9th_latency"] = 0
        return pod_info
        
    def consumer_per_test(self, topic_name, message_count):
        # reference: https://gist.github.com/ueokande/b96eadd798fff852551b80962862bfb3
        print "--- consumer_per_test ---"
        ns, ip, port = self.find_zookeeper_ip()
        ns, pod_list = self.find_consumer_pod()
        if not pod_list:
            raise Exception("consumer is not existed")
        pod_info = {}
        for pod in pod_list:
            pod_info[pod] = {}
            cmd = "/opt/kafka/bin/kafka-consumer-perf-test.sh --topic %s --messages %s --zookeeper=%s:%s --threads 1" % (topic_name, message_count, ip, port)
            # cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.ConsumerPerformance --topic %s --messages %s --zookeeper=%s:%s --threads 1" % (topic_name, message_count, ip, port)
            print cmd
            output = self.oc.exec_cmd(ns, pod, cmd)
            print "%s receives %s messages for topic %s" % (pod, message_count, topic_name)
            # print output
            for line in output.split("\n"):
                if line and line.find("start.time") == -1:
                    pod_info[pod]["MB.sec"] = float(line.split()[-3].split(",")[0])
                    pod_info[pod]["nMsg.sec"] = float(line.split()[-1])
        return pod_info

    def get_topic_info(self, topic_name):
        num_partition = 0
        output = self.describe_topic(topic_name)
        try:
            num_partition = int(output.split()[1].split(":")[1])
        except Exception as e:
            num_partition = 0
        print "%s has %s partitions" % (topic_name, num_partition)
        return num_partition

    def get_consumer_group_info(self, topic_name, group_name):
        topic_info = {}
        output = self.describe_consumer_group(group_name)
        for line in output.split("\n"):
            if line.find(topic_name) != -1:
                partition = int(line.split()[1])
                current_offset = int(line.split()[2])
                topic_info[partition] = current_offset
        return topic_info

    def simple_consumer_shell(self, topic_name, max_messages):
        # reference: https://segmentfault.com/a/1190000016106045
        # print "--- simple-consumer-shell ---"
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_consumer_pod()
        num_partition = self.get_topic_info(topic_name)
        topic_info = self.get_consumer_group_info(topic_name, group_name)
        if num_partition == 0:
            raise Exception("%s is not existed" % (topic_name))
        for pod in pod_list:
            pod_id = pod_list.index(pod)
            partition = pod_id % num_partition
            current_offset = topic_info[partition]
            offset = current_offset
            cmd = "/opt/kafka/bin/kafka-simple-consumer-shell.sh --broker-list %s:%s --partition %s --offset %s --max-messages %s --topic %s --property group_id=%s" % (ip, port, partition, offset, max_messages, topic_name, group_name)
            print cmd
            output = self.oc.exec_cmd(ns, pod, cmd)
            print "consumer(%s) of group(%s) receives %s messages at offset(%s) in partition(%s) of topic(%s) " % (pod, group_name, max_messages, offset, partition, topic_name)
        return 0

    def console_consumer(self, topic_name):
        print "--- console-consumer ---"
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_consumer_pod()
        for pod in pod_list:
            cmd = "/opt/kafka/bin/kafka-console-consumer.sh --bootstrap-server %s:%s --topic %s --consumer-property group.id=test1" % (ip, port, topic_name)
            print cmd
            output = self.oc.exec_cmd(ns, pod, cmd)
        return 0

    def verify_consumer(self, group_name, topic_name):
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_consumer_pod()
        for pod in pod_list:
            cmd = "/opt/kafka/bin/kafka-verifiable-consumer.sh --broker-list %s:%s --group-id %s --topic %s" % (ip, port, group_name, topic_name)
            print cmd
            output = self.oc.exec_cmd(ns, pod, cmd)
            print "consumer(%s) of group(%s) receives messages for topic(%s) " % (pod, group_name, topic_name)
        return 0

    def verify_producer(self, topic_name):
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_consumer_pod()
        for pod in pod_list:
            cmd = "kafka-verifiable-producer.sh --broker-list %s:%s --max-messages %s --topic %s" % (ip, port, messages, topic_name)
            output = self.oc.exec_cmd(ns, pod, cmd)
            print "producer(%s) send %s messages to topic(%s) " % (pod, messages, topic_name)
        return 0

    def end_to_end_latency(self, topic_name, num_messages):
        print "--- latency from producer to broker and broker to consumer ---"
        ns, ip, port = self.find_broker_ip()
        ns, pod_list = self.find_consumer_pod()
        if not pod_list:
            raise Exception("consumer is not existed")
        pod_info = {}
        for pod in pod_list:
            pod_info[pod] = {}
        cmd = "/opt/kafka/bin/kafka-run-class.sh kafka.tools.EndToEndLatency %s:%s %s %s all 100" % (ip, port, topic_name, num_messages)
        output = self.oc.exec_cmd(ns, pod, cmd)
        return output


    def compute_avg_metrics(self, pod_info_list):
        total_metric = {}
        avg_metric = {}
        pod_info = pod_info_list[0]
        pod_num = len(pod_info.keys())
        pod_name = pod_info.keys()[0]
        for metric in pod_info[pod_name].keys():
            total_metric[metric] = 0
            avg_metric[metric] = 0
    
        for pod_info in pod_info_list:
            for pod in pod_info.keys():
                for metric in pod_info[pod].keys():
                    if pod_info[pod].get(metric):
                        if metric == "record":
                            total_metric[metric] += pod_info[pod][metric]
                        else:
                            total_metric[metric] += pod_info[pod][metric] * pod_info[pod]["record"]

        num_time = len(pod_info_list)
        for metric in avg_metric.keys():
            if metric == "record":
                avg_metric[metric] = total_metric[metric] / (pod_num*1.0) / num_time
            else:
                avg_metric[metric] = total_metric[metric] / (pod_num*1.0) / total_metric["record"]
        return avg_metric

    def calculate_consumer_rate(self):
        consumer_rate = 0
        ns, pod_list = self.find_consumer_pod()
        start_time = 0
        end_time = 0
        total_count = 0
        comsumer_rate = 0
        for pod in pod_list:
            output = self.oc.log_pod(ns, pod)
            for line in output.split("\n"):
                # print line
                if line and line.find("WARN") == -1:
                    line = json.loads(line)
                    timestamp = line.get("timestamp")
                    count = line.get("count")
                    if count:
                        start_time = timestamp
                        break
        for pod in pod_list:
            output = self.oc.log_pod(ns, pod)
            for line in output.split("\n"):
                if line and line.find("WARN") == -1:
                    line = json.loads(line)
                    timestamp = line.get("timestamp")
                    count = line.get("count")
                    if count:
                        total_count += count
                        end_time = timestamp
        time_diff = (end_time - start_time) * 1.0
        if time_diff:
            comsumer_rate = total_count / time_diff * 1000
        print "consumer process rate: ", comsumer_rate, total_count, time_diff
        return consumer_rate


    def delete_topic_data(self, topic_name):
        nfs_dir = "/data"
        data_list = os.listdir(nfs_dir)
        for broker in data_list:
            broker_dir = "%s/%s" % (nfs_dir, broker)
            broker_data_list = os.listdir(broker_dir)
            for log_dir in broker_data_list:
                broker_data = "%s/%s" % (broker_dir, log_dir)
                data_list = os.listdir(broker_data)
                for data in data_list:
                    if data.find(topic_name) != -1:
                        print data
                        break

Example #10

Show file

File: broker.py Project: prophetstor-ai/federatorai-demo-kafka

class Broker(Client):
    oc = OC()
    k = Kubectl()
    w = WriteLog()

    def __init__(self):
        super(Broker, self).__init__()
        self.namespace = "myproject"
        self.app_name = "my-cluster-kafka"
        self.app_type = "statefulset"
        self.w.namespace = self.namespace
        self.w.app_name = self.app_name
        self.w.app_type = self.app_type

    def wait_time(self, value):
        # print "wait %d seconds" % value
        time.sleep(value)

    def calculate_pod_info(self):
        app_cpu_value = 0
        app_memory_value = 0
        app_cpu_limit = 0
        app_memory_limit = 0
        app_restart = 0
        app_status_running = 0
        app_status_crashloopbackoff = 0
        app_status_oomkilled = 0

        for pod in self.w.app_list[self.app_name].keys():
            if pod.find("zookeeper") != -1 or pod.find("exporter") != -1:
                continue
            for item in self.w.app_list[self.app_name][pod].keys():
                if item in ["cpu_value"]:
                    app_cpu_value += self.w.app_list[
                        self.app_name][pod]["cpu_value"]
                elif item in ["memory_value"]:
                    app_memory_value += self.w.app_list[
                        self.app_name][pod]["memory_value"]
                elif item in ["pod_cpu_limits"]:
                    app_cpu_limit += self.w.app_list[
                        self.app_name][pod]["pod_cpu_limits"]
                elif item in ["pod_memory_limits"]:
                    app_memory_limit += self.w.app_list[
                        self.app_name][pod]["pod_memory_limits"]
                elif item in ["restart"]:
                    app_restart += self.w.app_list[
                        self.app_name][pod]["restart"]
                elif item == "status":
                    status = self.w.app_list[self.app_name][pod]["status"]
                    if status in ["Running"]:
                        app_status_running += 1
                    if status in ["CrashLoopBackOff"]:
                        app_status_crashloopbackoff += 1
                    if status in ["OOMKilled"]:
                        app_status_oomkilled += 1
        print "- Brokers: CPU %s/%s mCore; Memory %s/%s Mi; Restart %s" % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart)
        output = "%s %s %s %s %s %s %s %s " % (
            app_cpu_value, app_cpu_limit, app_memory_value, app_memory_limit,
            app_restart, app_status_running, app_status_crashloopbackoff,
            app_status_oomkilled)
        return output

    def calculate_overlimit(self):
        app_cpu_overlimit = 0
        app_memory_overlimit = 0

        count = 0
        # calculate overlimit
        for pod in self.w.app_list[self.app_name].keys():
            if pod.find("zookeeper") != -1 or pod.find("exporter") != -1:
                continue
            cpu_value = self.w.app_list[self.app_name][pod]["cpu_value"]
            memory_value = self.w.app_list[self.app_name][pod]["memory_value"]
            cpu_limit = self.w.app_list[self.app_name][pod]["pod_cpu_limits"]
            memory_limit = self.w.app_list[
                self.app_name][pod]["pod_memory_limits"]
            if cpu_limit <= cpu_value:
                app_cpu_overlimit += 1
            if memory_limit <= memory_value:
                app_memory_overlimit += 1
            count += 1
        num_replica = count
        print "- Brokers: OverLimit %s; OOM: %s" % (app_cpu_overlimit,
                                                    app_memory_overlimit)
        output = "%s %s %s " % (app_cpu_overlimit, app_memory_overlimit,
                                num_replica)
        return output

    def calculate_performance(self):
        num_partition = 0
        output = self.describe_topic(topic_name)
        for line in output.split("\n"):
            if line and line.find("ReplicationFactor") == -1:
                if line.find("Isr") != -1:
                    num_partition += 1
        print "- Brokers: Partitions %s" % num_partition
        result = "%s " % num_partition
        return result

    def write_logs(self, algo_name):
        self.w.get_deploymentconfig()
        self.w.get_pod_info()
        self.w.get_limits()
        self.w.get_metrics()
        self.w.get_status()

        file_name = "%s/%s_broker_metrics" % (traffic_path, algo_name)
        timestamp = int(time.time())
        line = "%s " % (timestamp)
        line += self.calculate_pod_info()
        line += self.calculate_overlimit()
        line += self.calculate_performance()
        line += "\n"

        try:
            with open(file_name, "a") as f:
                f.write(line)
        except Exception as e:
            print "failed to write broker logs(%s): %s" % (file_name, str(e))
            return -1

        # print "success to write broker logs(%s)" % file_name
        return 0