예제 #1
0
파일: status.py 프로젝트: benc-uk/pikube
def showNodeMetrics():
  # Get usage metrics from raw API call to /apis/metrics.k8s.io/v1beta1/nodes
  api_client = client.ApiClient()
  raw_resp = api_client.call_api('/apis/metrics.k8s.io/v1beta1/nodes/', 'GET', _preload_content=False)
  # Crazy conversion required, dunno why this is so hard
  response_metrics = json.loads(raw_resp[0].data.decode('utf-8'))  

  # Call list Nodes
  nodes = v1.list_node()
  count = 0
  for node in nodes.items:
    if count > 2: break
    # Get name and cpu/mem capacity 
    name = node.metadata.name
    mem_capacity = utils.parse_quantity(node.status.allocatable["memory"])
    cpu_capacity = utils.parse_quantity(node.status.allocatable["cpu"])

    # Search node metrics we grabbed before keyed on node name
    node_metrics = next(n for n in response_metrics["items"] if n["metadata"]["name"] == name)
    mem_usage = utils.parse_quantity(node_metrics["usage"]["memory"])
    cpu_usage = utils.parse_quantity(node_metrics["usage"]["cpu"])

    cpu_perc = round((cpu_usage/cpu_capacity)*100)
    mem_perc = round((mem_usage/mem_capacity)*100) 

    lcd.set_cursor_position(7, count)
    lcd.write(f"{cpu_perc: 3}%")
    lcd.set_cursor_position(12, count)
    lcd.write(f"{mem_perc: 3}%")    
    count = count + 1
예제 #2
0
def get_kubernetes_node_info_from_API():
    config.load_kube_config()
    api_instance = client.CoreV1Api()

    # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CoreV1Api.md#list_node
    pretty = 'true'
    timeout_seconds = 56

    ret = dict()
    try:
        api_response = api_instance.list_node(pretty=pretty,
                                              timeout_seconds=timeout_seconds)
        for node in api_response.items:
            ret[node.metadata.name] = {
                "cpu-resource":
                int(parse_quantity(node.status.allocatable['cpu'])),
                "mem-resource":
                int(
                    parse_quantity(node.status.allocatable['memory']) / 1024 /
                    1024),
                "gpu-resource":
                int(parse_quantity(node.status.allocatable['nvidia.com/gpu'])),
            }
    except ApiException as e:
        logger.error("Exception when calling CoreV1Api->list_node: %s\n" % e)

    return ret
예제 #3
0
    def test_parse(self):
        self.assertIsInstance(parse_quantity(2.2), Decimal)
        # input, expected output
        tests = [
            (0, 0),
            (2, 2),
            (2, Decimal("2")),
            (2., 2),
            (Decimal("2.2"), Decimal("2.2")),
            (2., Decimal(2)),
            (Decimal("2."), 2),
            ("123", 123),
            ("2", 2),
            ("2n", Decimal("2") * Decimal(1000)**-3),
            ("2u", Decimal("0.000002")),
            ("2m", Decimal("0.002")),
            ("0m", Decimal("0")),
            ("0M", Decimal("0")),
            ("223k", 223000),
            ("002M", 2 * 1000**2),
            ("2M", 2 * 1000**2),
            ("4123G", 4123 * 1000**3),
            ("2T", 2 * 1000**4),
            ("2P", 2 * 1000**5),
            ("2E", 2 * 1000**6),
            ("223Ki", 223 * 1024),
            ("002Mi", 2 * 1024**2),
            ("2Mi", 2 * 1024**2),
            ("2Gi", 2 * 1024**3),
            ("4123Gi", 4123 * 1024**3),
            ("2Ti", 2 * 1024**4),
            ("2Pi", 2 * 1024**5),
            ("2Ei", 2 * 1024**6),
            ("2.34n", Decimal("2.34") * Decimal(1000)**-3),
            ("2.34u", Decimal("2.34") * Decimal(1000)**-2),
            ("2.34m", Decimal("2.34") * Decimal(1000)**-1),
            ("2.34Ki", Decimal("2.34") * 1024),
            ("2.34", Decimal("2.34")),
            (".34", Decimal("0.34")),
            ("34.", 34),
            (".34M", Decimal("0.34") * 1000**2),
            ("2e2K", Decimal("2e2") * 1000),
            ("2e2Ki", Decimal("2e2") * 1024),
            ("2e-2Ki", Decimal("2e-2") * 1024),
            ("2.34E1", Decimal("2.34E1")),
            (".34e-2", Decimal("0.34e-2")),
        ]

        for inp, out in tests:
            self.assertEqual(parse_quantity(inp), out)
            if isinstance(inp, (int, float, Decimal)):
                self.assertEqual(parse_quantity(-1 * inp), -out)
            else:
                self.assertEqual(parse_quantity("-" + inp), -out)
                self.assertEqual(parse_quantity("+" + inp), out)
예제 #4
0
def get_pod_requests(pod):
    ret = {
        "cpu-resource": 0,
        "mem-resource": 0,
    }
    for container in pod.spec.containers:
        if container.resources.requests is None:
            continue
        ret["cpu-resource"] += parse_quantity(
            container.resources.requests.get("cpu", 0))
        ret["mem-resource"] += parse_quantity(
            container.resources.requests.get("memory", 0)) / 1024 / 1024
    return ret
예제 #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-l',
                        '--layout',
                        dest="layout",
                        required=True,
                        help="layout.yaml")
    parser.add_argument('-c',
                        '--config',
                        dest="config",
                        required=True,
                        help="cluster configuration")
    parser.add_argument('-o',
                        '--output',
                        dest="output",
                        required=True,
                        help="cluster configuration")
    args = parser.parse_args()

    output_path = os.path.expanduser(args.output)

    layout = load_yaml_config(args.layout)
    config = load_yaml_config(args.config)

    masters, workers = get_masters_workers_from_layout(layout)
    head_node = masters[0]

    # fill in cpu, memory, computing_device information in both masters and workers
    # we assume the layout file the user gives is correct
    all_machines = masters + workers
    for machine in all_machines:
        sku_info = layout['machine-sku'][machine['machine-type']]
        # use math.ceil to guarantee the memory volume
        # e.g. if use set 999.1MB, we ensure there is 1000MB to avoid scheduling issues
        machine['memory_mb'] = math.ceil(
            parse_quantity(sku_info['mem']) / 1024 / 1024)
        machine['cpu_vcores'] = sku_info['cpu']['vcore']
        if 'computing-device' in sku_info:
            machine['computing_device'] = sku_info['computing-device']

    # add machine to different comupting device group
    computing_device_groups = defaultdict(list)
    for machine in all_machines:
        sku_info = layout['machine-sku'][machine['machine-type']]
        if 'computing-device' in sku_info:
            computing_device_groups[sku_info['computing-device']
                                    ['type']].append(machine['hostname'])

    environment = {
        'masters': masters,
        'workers': workers,
        'cfg': config,
        'head_node': head_node,
        'computing_device_groups': computing_device_groups,
    }

    map_table = {"env": environment}

    generate_template_file("quick-start/pre-check.yml.template",
                           "{0}/pre-check.yml".format(output_path), map_table)
예제 #6
0
def get_prophet_daemon_resource_request(cfg):
    ret = {"cpu-resource": 0, "mem-resource": 0}
    if "qos-switch" not in cfg or cfg["qos-switch"] == "false":
        logger.info(
            "Ignore calculate prophet daemon resource usage since qos-switch set to false"
        )
        return ret

    prophet_daemon_services = ["node-exporter", "job-exporter", "log-manager"]
    prophet_source_path = os.path.join(
        os.path.dirname(os.path.realpath(__file__)), "../../../src")

    # {%- if cluster_cfg['cluster']['common']['qos-switch'] == "true" %}
    start_match = r"{%-?\s*if\s*cluster_cfg\['cluster'\]\['common'\]\['qos-switch'\][^}]+%}"
    end_match = r"{%-?\s*endif\s*%}"  # {%- end %}
    str_match = "{}(.*?){}".format(start_match, end_match)
    regex = re.compile(str_match, flags=re.DOTALL)

    for prophet_daemon in prophet_daemon_services:
        deploy_template_path = os.path.join(
            prophet_source_path,
            "{0}/deploy/{0}.yaml.template".format(prophet_daemon))
        if os.path.exists(deploy_template_path):
            template = read_template(deploy_template_path)
            match = regex.search(template)
            if not match:
                logger.warning(
                    "Could not find resource request for service %s",
                    prophet_daemon)
                continue
            resources = yaml.load(match.group(1), yaml.SafeLoader)["resources"]
            if "requests" in resources:
                ret["cpu-resource"] += parse_quantity(
                    resources["requests"].get("cpu", 0))
                ret["mem-resource"] += parse_quantity(
                    resources["requests"].get("memory", 0)) / 1024 / 1024
            elif "limits" in resources:
                ret["cpu-resource"] += parse_quantity(resources["limits"].get(
                    "cpu", 0))
                ret["mem-resource"] += parse_quantity(resources["limits"].get(
                    "memory", 0)) / 1024 / 1024
        else:
            logger.warning("Could not find resource request for PAI daemon %s",
                           prophet_daemon)
    return ret
예제 #7
0
def k8s_container_resource_requirements(container) -> Dict[str, int]:
    """
    returns dict in format:
     {"req_cpu": 1, "req_mem": 1, "lim_cpu": 2, "lim_mem": 2}
    """
    try:
        cpu = 0
        memory = 0
        if not container.resources:
            return {
                "req_cpu": cpu,
                "req_mem": memory,
                "lim_cpu": cpu,
                "lim_mem": memory
            }
        limits = container.resources.limits
        lim_cpu = 0
        lim_mem = 0
        if limits and limits.get('cpu'):
            cpu = parse_quantity(limits['cpu'])
            lim_cpu = cpu
        if limits and limits.get('memory'):
            memory = parse_quantity(limits['memory'])
            lim_mem = memory
        requests = container.resources.requests
        req_cpu = 0
        req_mem = 0
        if requests and requests.get('cpu'):
            cpu = parse_quantity(requests['cpu'])
            req_cpu = max(cpu, cpu)
        if requests and requests.get('memory'):
            memory = parse_quantity(requests['memory'])
            req_mem = max(memory, memory)
        return {
            "req_cpu": req_cpu,
            "req_mem": req_mem,
            "lim_cpu": lim_cpu,
            "lim_mem": lim_mem
            }
    except Exception:
        logger.exception('Error getting resource requirements for container')
예제 #8
0
def get_k8s_cluster_info(working_dir, dns_prefix, location):
    kube_config_path = "{0}/_output/{1}/kubeconfig/kubeconfig.{2}.json".format(
        working_dir, dns_prefix, location)
    master_string = "opmaster"
    worker_string = "opworker"

    config.load_kube_config(config_file=kube_config_path)
    api_instance = client.CoreV1Api()
    pretty = 'true'
    timeout_seconds = 56

    master = dict()
    worker = dict()
    sku = None
    gpu_enable = False
    master_ip = None
    master_ip_internal = None

    worker_count = 0
    worker_with_gpu = 0

    try:
        api_response = api_instance.list_node(pretty=pretty,
                                              timeout_seconds=timeout_seconds)
        for node in api_response.items:
            gpu_resource = 0
            if 'nvidia.com/gpu' in node.status.allocatable:
                gpu_resource = int(
                    parse_quantity(node.status.allocatable['nvidia.com/gpu']))
            if master_string in node.metadata.name:
                master[node.metadata.name] = {
                    "cpu-resource":
                    int(parse_quantity(node.status.allocatable['cpu'])) - 2,
                    "mem-resource":
                    int(
                        parse_quantity(node.status.allocatable['memory']) /
                        1024 / 1024) - 8 * 1024,
                    "gpu-resource":
                    gpu_resource,
                }
                master[node.metadata.name]["hostname"] = node.metadata.name
                for address in node.status.addresses:
                    if address.type == "Hostname":
                        continue
                    if master_ip == None:
                        master_ip = address.address
                    if address.type == "ExternalIP":
                        master_ip = address.address
                    if address.type == "InternalIP":
                        master[node.metadata.name]["ip"] = address.address
                        master_ip_internal = address.address
            elif worker_string in node.metadata.name:
                worker[node.metadata.name] = {
                    "cpu-resource":
                    int(parse_quantity(node.status.allocatable['cpu'])) - 2,
                    "mem-resource":
                    int(
                        parse_quantity(node.status.allocatable['memory']) /
                        1024 / 1024) - 8 * 1024,
                    "gpu-resource":
                    gpu_resource,
                }
                if sku is None:
                    sku = dict()
                    if gpu_resource != 0:
                        sku["gpu_resource"] = worker[
                            node.metadata.name]["gpu-resource"]
                        sku["mem-unit"] = int(
                            worker[node.metadata.name]["mem-resource"] /
                            worker[node.metadata.name]["gpu-resource"])
                        sku["cpu-unit"] = int(
                            worker[node.metadata.name]["cpu-resource"] /
                            worker[node.metadata.name]["gpu-resource"])
                    else:
                        sku["cpu_resource"] = worker[
                            node.metadata.name]["cpu-resource"]
                        sku["mem-unit"] = int(
                            worker[node.metadata.name]["mem-resource"] /
                            worker[node.metadata.name]["cpu-resource"])
                worker_count = worker_count + 1
                if worker[node.metadata.name]["gpu-resource"] != 0:
                    worker_with_gpu = worker_with_gpu + 1
                    gpu_enable = True
                worker[node.metadata.name]["hostname"] = node.metadata.name
                for address in node.status.addresses:
                    if address.type == "Hostname":
                        continue
                    if address.type == "InternalIP":
                        worker[node.metadata.name]["ip"] = address.address

    except ApiException as e:
        logger.error("Exception when calling CoreV1Api->list_node: %s\n" % e)

    return {
        "master":
        master,
        "worker":
        worker,
        "sku":
        sku,
        "gpu":
        gpu_enable,
        "gpu-ready":
        worker_count == worker_with_gpu,
        "master_ip":
        master_ip,
        "master_internal":
        master_ip_internal,
        "working_dir":
        "{0}/{1}".format(working_dir, TEMPORARY_DIR_NAME),
        "kube_config":
        "{0}/_output/{1}/kubeconfig/kubeconfig.{2}.json".format(
            working_dir, dns_prefix, location)
    }
예제 #9
0
def parse_kubernetes_value(val: str) -> str:
    return str(parse_quantity(val))
예제 #10
0
파일: mem-cpu.py 프로젝트: benc-uk/pikube
    api_client = client.ApiClient()
    raw_resp = api_client.call_api('/apis/metrics.k8s.io/v1beta1/nodes/',
                                   'GET',
                                   _preload_content=False)
    # Crazy conversion required
    response_metrics = json.loads(raw_resp[0].data.decode('utf-8'))

    # Call list Nodes
    nodes = v1.list_node()

    node_index = 0
    unicorn.off()
    for node in nodes.items:
        # Get name and cpu/mem capacity
        name = node.metadata.name
        mem_capacity = utils.parse_quantity(node.status.allocatable["memory"])
        cpu_capacity = utils.parse_quantity(node.status.allocatable["cpu"])

        # Search node metrics we grabbed before keyed on node name
        node_metrics = next(n for n in response_metrics["items"]
                            if n["metadata"]["name"] == name)
        mem_usage = utils.parse_quantity(node_metrics["usage"]["memory"])
        cpu_usage = utils.parse_quantity(node_metrics["usage"]["cpu"])

        cpu_led_max = round((cpu_usage / cpu_capacity) * width)
        mem_led_max = round((mem_usage / mem_capacity) * width)
        drawBarRed(cpu_led_max, node_index)
        drawBarGreen(mem_led_max, node_index + 1)
        # Skip three lines, so leaves a gap
        # NOTE! Only works with three nodes!!
        node_index = node_index + 3
예제 #11
0
def parse_cpu_quantity(q):
    if q is None:
        return None
    return parse_quantity(q) * 1000
예제 #12
0
def parse_memory_quantity(q):
    if q is None:
        return None
    return parse_quantity(q) / MEBIBYTE