Esempio n. 1
0
def new_client_from_dict(conf: dict, context: str):
    client_config = type.__call__(Configuration)
    config.load_kube_config_from_dict(config_dict=conf,
                                      context=context,
                                      persist_config=False,
                                      client_configuration=client_config)
    return ApiClient(configuration=client_config)
async def _create_test_pod(model):
    """Create tests pod and return spec."""
    # load kubernetes config
    kubeconfig = await _get_kubeconfig(model)
    load_kube_config_from_dict(kubeconfig)

    api = client.CoreV1Api()
    pod_manifest = {
        "apiVersion": "v1",
        "kind": "Pod",
        "metadata": {
            "name": "test"
        },
        "spec": {
            "containers": [{
                "image": "busybox",
                "name": "test",
                "args": ["echo", '"test"']
            }]
        },
    }
    log.info("Creating Test Pod")
    resp = api.create_namespaced_pod(body=pod_manifest, namespace="default")
    # wait for pod not to be in pending
    i = 0
    while resp.status.phase == "Pending" and i < 30:
        i += 1
        log.info("pod pending {s} seconds...".format(s=(i - 1) * 10))
        sleep(10)
        resp = api.read_namespaced_pod("test", namespace="default")

    api.delete_namespaced_pod("test", namespace="default")
    return resp
Esempio n. 3
0
    def waitForAutoscalerPodInRunningState(self,
                                           cluster_id,
                                           retries=5,
                                           interval=60):
        k8s_config = self.fetchKubernetesClusterConfig(cluster_id)
        cfg = io.StringIO(k8s_config.configdata)
        cfg = yaml.safe_load(cfg)
        # Adding this so we don't get certificate exceptions
        cfg['clusters'][0]['cluster']['insecure-skip-tls-verify'] = True
        config.load_kube_config_from_dict(cfg)
        v1 = client.CoreV1Api()

        while retries > 0:
            time.sleep(interval)
            pods = v1.list_pod_for_all_namespaces(
                watch=False, label_selector="app=cluster-autoscaler").items
            if len(pods) == 0:
                self.debug("Autoscaler pod still not up")
                continue
            pod = pods[0]
            if pod.status.phase == 'Running':
                self.debug("Autoscaler pod %s up and running!" %
                           pod.metadata.name)
                return True
            self.debug(
                "Autoscaler pod %s up but not running on retry %d. State is : %s"
                % (pod.metadata.name, retries, pod.status.phase))
            retries = retries - 1
        return False
Esempio n. 4
0
def test():
    config.load_kube_config_from_dict(
        config_dict=DEV_KUBE_CONFIG,
        context='kubernetes-admin-c52cb6ad200314d608c83bc5c5edce7bd')

    v1 = client.CoreV1Api()
    print('v1')
    for pod in v1.list_namespaced_pod('default').items:
        print(pod.metadata.name)

    config.load_kube_config_from_dict(
        config_dict=QA_KUBE_CONFIG,
        context='kubernetes-admin-c5af3042e70684dc7abac02e2340c2188')

    v2 = client.CoreV1Api()
    print('v2')
    for pod in v2.list_namespaced_pod('default').items:
        print(pod.metadata.name)

    print('v1')
    for pod in v1.list_namespaced_pod('default').items:
        print(pod.metadata.name)
Esempio n. 5
0
def load_auth(auth_type=None, token=None, **kwargs):
    if not auth_type and not token:
        request = kwargs.get('request', None)
        auth_type = request.session.get('auth_type')
        token = request.session.get('token')
    if auth_type == 'token':
        token = cache.get(token)
        configuration = client.Configuration()
        configuration.host = 'https://192.168.35.61:6443'
        configuration.ssl_ca_cert = Path(settings.BASE_DIR, 'static', 'ca.crt')
        configuration.verify_ssl = True
        configuration.api_key = {'authorization': 'Bearer {}'.format(token)}
        client.Configuration.set_default(configuration)

    elif auth_type == 'kube_config':
        """
        弃用的认证方式
        # file_path = Path('kube_config', token)
        # config.load_kube_config(r'%s' % file_path)
        """
        kube_yaml = cache.get(token)
        config.load_kube_config_from_dict(kube_yaml)
Esempio n. 6
0
def get_user_configuration(user):
    """
    Get a configuration for the Kubernetes client library.
    The credentials of the given portal user are used,
    access is restricted to the default namespace. 

    Returns None on error. 
    """
    if not user.has_access_approved():
        logger.error(
            "Kubernetes API configuration for user unavailable, user is not approved."
        )
        return None

    if not settings.API_SERVER_EXTERNAL:
        logger.error(
            "Kubernetes API configuration for user unavailable, API_SERVER_EXTERNAL is not set."
        )
        return None

    config_data = {
        'current-context':
        user.username,
        'contexts': [{
            'name': user.username,
            'context': {
                'cluster': 'thecluster',
                'namespace': user.service_account.namespace.name,
                'user': user.username
            }
        }],
        'clusters': [{
            'name': 'thecluster',
            'cluster': {
                'insecure-skip-tls-verify': True,
                'server': settings.API_SERVER_EXTERNAL
            }
        }],
        'users': [{
            'name': user.username,
            'user': {
                'token': get_token(user.service_account)
            }
        }]
    }

    return config.load_kube_config_from_dict(config_data)
def setup_gke():
    filename = os.path.join(CURR_DIR, "kubeconfig.yaml")
    config_dict = yaml.safe_load(open(filename, "r").read())
    config.load_kube_config_from_dict(config_dict)
Esempio n. 8
0
def k8s_apply():
    global status, configs, containers

    # set deployment config
    data = request.get_json()
    logging.info(type(data["models"]))
    # append uuid to model name
    append_models_uuid(data["models"])
    configs["containers_manager"].models = data["models"]
    # take the first initial_replicas
    configs["k8s_config"].initial_replicas = data["models"][0]["initial_replicas"]
    configs["k8s_config"].models = data["models"]
    logging.info("models: " + str(configs["k8s_config"].models))
    configs["k8s_config"].available_gpus = data["available_gpus"]
    configs["k8s_config"].tfs_image = data["tfs_image"]
    if "k8s_api_configuration" in data:
        configs["k8s_config"].k8s_api_configuration = data["k8s_api_configuration"]

    generate_k8s_deployment_service()

    if k8s_deployment and k8s_service and k8s_containers:
        # configure K8s API
        if configs["k8s_config"].k8s_api_configuration:
            logging.info("K8s API using config: " + str(configs["k8s_config"].k8s_api_configuration))
            config_k8s_api.load_kube_config_from_dict(configs["k8s_config"].k8s_api_configuration)
        else:
            logging.info("K8s API using default config")
            config_k8s_api.load_kube_config()

        # apply k8s deployment
        apps_api = client_k8s_api.AppsV1Api()
        try:
            resp = apps_api.create_namespaced_deployment(namespace="default", body=k8s_deployment)
            if resp and resp.metadata and resp.metadata.name:
                logging.info("Service created. status='%s'" % resp.metadata.name)
            else:
                raise client_k8s_api.exceptions.ApiException()
        except client_k8s_api.exceptions.ApiException:
            status = "error K8s deployment"
            logging.info(status)
            return {"result": "error during the creation of the K8s deployment"}, 400

        # apply k8s service
        try:
            apps_api = client_k8s_api.CoreV1Api()
            resp = apps_api.create_namespaced_service(namespace="default", body=k8s_service)
            if resp and resp.metadata and resp.metadata.name:
                logging.info("Service created. status='%s'" % resp.metadata.name)
            else:
                raise client_k8s_api.exceptions.ApiException()
        except client_k8s_api.exceptions.ApiException:
            status = "error K8s service"
            logging.info(status)
            return {"result": "error during the creation of the K8s service"}, 400

        # list node IPs
        # wait until the service is applied in k8s
        sleep_time = 2
        waited_time = 0
        timeout = 100
        service_ok = False
        while not service_ok:
            logging.info("Waiting %ds, total waited %ds/%ds for K8s service..." % (sleep_time, waited_time, timeout))
            time.sleep(sleep_time)
            waited_time += sleep_time
            resp = apps_api.read_namespaced_endpoints(namespace="default", name="nodemanager-svc")
            # check if response
            if resp and resp.subsets and len(resp.subsets) > 0:
                for subset in resp.subsets:
                    # check if subset is ready
                    if not subset.not_ready_addresses:
                        service_ok = True
            sleep_time *= 2
            if waited_time > timeout:
                status = "error K8s service timeout"
                logging.info(status)
                return {"result": "error timeout waiting for K8s service"}, 400

        k8s_nodes = []
        for i, subset in enumerate(resp.subsets):
            if not resp.subsets[i].addresses:
                status = "error K8s service IPs not found"
                logging.info(status)
                return {"result": "error K8s service, node IPs not found, API returned: " + str(resp)}, 400
            if len(resp.subsets[i].addresses) == 0:
                status = "error K8s service IPs empty"
                logging.info(status)
                return {"result": "error K8s service, node IPs empty, API returned: " + str(resp)}, 400
            for j, address in enumerate(resp.subsets[i].addresses):
                k8s_nodes.append(address.ip)
        logging.info("Available node (IPs), node ips=" + str(k8s_nodes))

        # populate the container list
        containers = []
        for node in k8s_nodes:
            for k8s_container in k8s_containers:
                container = k8s_container
                container.active = True
                container.set_node(node)
                containers.append(container)

        logging.info("+ %d CPU containers, not linked yet", len(list(filter(lambda m: m.device == Device.CPU, containers))))
        logging.info("+ %d GPU containers, not linked yet", len(list(filter(lambda m: m.device == Device.GPU, containers))))
        logging.info("containers: " + str([c.to_json() for c in containers]))

        # update containers
        configs["containers_manager"].containers = [c.to_json() for c in containers]

        # configure and start components
        status = "configuring components"
        logging.info(status)
        configured = configure_components()
        if configured:
            status = "starting components"
            logging.info(status)
            started = start_components()
            if started:
                status = "active"
                logging.info(status)
                endpoints = {m["name"][:-37]: configs["orchestrator"].dispatcher + "/predict/" + m["name"]
                             for m in configs["k8s_config"].models}
                return {"endpoints": endpoints}, 200
        return {"result": "error configuration"}, 400
    else:
        return {"result": "not configured yet"}, 400