def __init__(self): self.logger = logging.getLogger(__name__) try: config_file = os.path.expanduser(kubeconfig_filepath) config.load_kube_config(config_file=config_file) except: self.logger.warning("unable to load kube-config") self.v1 = client.CoreV1Api() self.v1Beta1 = client.AppsV1beta1Api() self.extensionsV1Beta1 = client.ExtensionsV1beta1Api() self.autoscalingV1Api = client.AutoscalingV1Api() self.rbacApi = client.RbacAuthorizationV1beta1Api() self.batchV1Api = client.BatchV1Api() self.batchV2Api = client.BatchV2alpha1Api()
def kube_apis(cli_arguments) -> KubeApis: """ Set up kubernets-client to operate in cluster. :param cli_arguments: a set of command-line arguments :return: KubeApis """ context_name = cli_arguments['context'] kubeconfig = cli_arguments['kubeconfig'] config.load_kube_config(config_file=kubeconfig, context=context_name, persist_config=False) v1 = client.CoreV1Api() extensions_v1_beta1 = client.ExtensionsV1beta1Api() rbac_v1_beta1 = client.RbacAuthorizationV1beta1Api() return KubeApis(v1, extensions_v1_beta1, rbac_v1_beta1)
def __init__(self): """ Load the kube config and create an instance of the Kubernetes API client. """ if environ.get('KUBERNETES_PORT'): config.load_incluster_config() else: config.load_kube_config() self.client = client self._apiClient = client.ApiClient() self.coreV1 = client.CoreV1Api(self._apiClient) self.rbacV1Beta1 = client.RbacAuthorizationV1beta1Api(self._apiClient) self.extV1Beta1 = client.ExtensionsV1beta1Api(self._apiClient) self.appsV1 = client.AppsV1beta1Api() self.StorageV1beta1Api = client.StorageV1beta1Api()
def api_client_from_version(api_version): return { "v1": kube_client.CoreV1Api(), "apps/v1": kube_client.AppsV1Api(), "batch/v1": kube_client.BatchV1Api(), "batch/v1beta1": kube_client.BatchV1beta1Api(), "extensions/v1beta1": kube_client.ExtensionsV1beta1Api(), "rbac.authorization.k8s.io/v1beta1": kube_client.RbacAuthorizationV1beta1Api(), "rbac.authorization.k8s.io/v1": kube_client.RbacAuthorizationV1Api(), }[api_version]
def create_kubeconfig(self, api_server_ip: str): with open("%s/manifests/kubeconfig/clusterrolebinding.yaml" % self.euid_path) as f: clusterrolebinding_manifest = yaml.load(f) c = kubeclient.ApiClient(host="http://%s:%d" % (api_server_ip, self.ec.kubernetes_apiserver_insecure_port)) rbac = kubeclient.RbacAuthorizationV1beta1Api(c) rbac.create_cluster_role_binding(clusterrolebinding_manifest) kube_config = { 'preferences': {'colors': True}, 'users': [ { 'user': { 'client-key': '%s/kubernetes_kubelet.private_key' % self.test_certs_path, 'client-certificate': '%s/kubernetes_kubelet.certificate' % self.test_certs_path }, 'name': 'enjoliver.local' } ], 'kind': 'Config', 'apiVersion': 'v1', 'clusters': [ { 'cluster': { 'server': "https://%s:6443" % api_server_ip, 'certificate-authority': '%s/kubernetes_kubelet.issuing_ca' % self.test_certs_path }, 'name': 'enjoliver' } ], 'contexts': [ { 'name': 'e', 'context': { 'cluster': 'enjoliver', 'namespace': 'kube-system', 'user': '******' } } ], 'current-context': 'e' } with open(os.path.join(self.tests_path, "testing_kubeconfig.yaml"), "w") as kc: yaml.dump(kube_config, kc)
def deploy_unique_rbac_resources(self): """ The cluster-wide RBAC resources (clusterrole/clusterroldbinding) are not namespaced, so they have to be handled specially to ensure they are unique amongst potentially multiple deployments of the agent in the same cluster. Basically just sticks the test namespace as a suffix to the resource names. """ corev1 = kube_client.CoreV1Api() rbacv1beta1 = kube_client.RbacAuthorizationV1beta1Api() serviceaccount = corev1.create_namespaced_service_account( body=load_resource_yaml(AGENT_SERVICEACCOUNT_PATH), namespace=self.namespace) clusterrole_base = load_resource_yaml(AGENT_CLUSTERROLE_PATH) clusterrole_base["metadata"][ "name"] = f"signalfx-agent-{self.namespace}" clusterrole = rbacv1beta1.create_cluster_role(body=clusterrole_base) crb_base = load_resource_yaml(AGENT_CLUSTERROLEBINDING_PATH) # Make the binding refer to our testing namespace's role and service account crb_base["metadata"]["name"] = f"signalfx-agent-{self.namespace}" crb_base["roleRef"]["name"] = clusterrole.metadata.name crb_base["subjects"][0]["namespace"] = self.namespace crb = rbacv1beta1.create_cluster_role_binding(body=crb_base) try: yield finally: delete_opts = kube_client.V1DeleteOptions( grace_period_seconds=0, propagation_policy="Background") rbacv1beta1.delete_cluster_role_binding(crb.metadata.name, body=delete_opts) rbacv1beta1.delete_cluster_role(clusterrole.metadata.name, body=delete_opts) corev1.delete_namespaced_service_account( serviceaccount.metadata.name, namespace=self.namespace, body=delete_opts) print("Deleted RBAC resources")
def kube_apis(cli_arguments) -> KubeApis: """ Set up kubernets-client to operate in cluster. :param cli_arguments: a set of command-line arguments :return: KubeApis """ context_name = cli_arguments["context"] kubeconfig = cli_arguments["kubeconfig"] config.load_kube_config(config_file=kubeconfig, context=context_name, persist_config=False) v1 = client.CoreV1Api() extensions_v1_beta1 = client.ExtensionsV1beta1Api() apps_v1_api = client.AppsV1Api() rbac_v1_beta1 = client.RbacAuthorizationV1beta1Api() api_extensions_v1_beta1 = client.ApiextensionsV1beta1Api() custom_objects = client.CustomObjectsApi() return KubeApis(v1, extensions_v1_beta1, apps_v1_api, rbac_v1_beta1, api_extensions_v1_beta1, custom_objects)
def create_tiller(self, api_server_ip: str): c = kubeclient.ApiClient(host="http://%s:%d" % (api_server_ip, self.ec.kubernetes_apiserver_insecure_port)) with open("%s/manifests/tiller/tiller-service.yaml" % self.euid_path) as f: service_manifest = yaml.load(f) with open("%s/manifests/tiller/tiller-deploy.yaml" % self.euid_path) as f: deploy_manifest = yaml.load(f) with open("%s/manifests/tiller/tiller-service-account.yaml" % self.euid_path) as f: serviceaccount_manifest = yaml.load(f) with open("%s/manifests/tiller/clusterrolebinding.yaml" % self.euid_path) as f: clusterrolebinding_manifest = yaml.load(f) core, beta = kubeclient.CoreV1Api(c), kubeclient.ExtensionsV1beta1Api(c) rbac = kubeclient.RbacAuthorizationV1beta1Api(c) core.create_namespaced_service("kube-system", service_manifest) rbac.create_cluster_role_binding(clusterrolebinding_manifest) core.create_namespaced_service_account("kube-system", serviceaccount_manifest) beta.create_namespaced_deployment("kube-system", deploy_manifest)
def create_tiller_service_accounts(api_client): logging.info("Creating service account for tiller.") api = k8s_client.CoreV1Api(api_client) body = yaml.load("""apiVersion: v1 kind: ServiceAccount metadata: name: tiller namespace: kube-system""") try: api.create_namespaced_service_account("kube-system", body) except rest.ApiException as e: if e.status == 409: logging.info("Service account tiller already exists.") else: raise body = yaml.load("""apiVersion: rbac.authorization.k8s.io/v1beta1 kind: ClusterRoleBinding metadata: name: tiller roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: cluster-admin subjects: - kind: ServiceAccount name: tiller namespace: kube-system """) rbac_api = k8s_client.RbacAuthorizationV1beta1Api(api_client) try: rbac_api.create_cluster_role_binding(body) except rest.ApiException as e: if e.status == 409: logging.info( "Role binding for service account tiller already exists.") else: raise
def __init__(self, **kwargs): """ Set cluster and prepare clients for all used resource types. Args: **kwargs: Keyword arguments (cluster is required) """ # load configuration try: self.cluster = kwargs['cluster'] except KeyError: raise ValueError('Missing parameter cluster') logger.debug('Initialized KubernetesAPI for {}'.format(self.cluster)) # set apis api_client = self.get_api_client() self.api_corev1 = client.CoreV1Api(api_client=api_client) self.api_extensionsv1beta1 = client.ExtensionsV1beta1Api( api_client=api_client) self.api_rbacauthorizationv1beta1 = client.RbacAuthorizationV1beta1Api( api_client=api_client) self.api_version = client.VersionApi(api_client=api_client)
def create_rbac_api(self): cfg = self.get_kubecfg() return client.RbacAuthorizationV1beta1Api(cfg)
def perform_cloud_ops(): # set GOOGLE_APPLICATION_CREDENTIALS env to credentials file # set GOOGLE_CLOUD_PROJECT env to project id domain = os.getenv('DOMAIN') assert domain logger.info(f'using domain: {domain}') static_ip = os.getenv('STATIC_IP') assert static_ip logger.info(f'using static IP: {static_ip}') admin_email = os.getenv('ADMIN_EMAIL') assert admin_email logger.info(f'using ACME admin email: {admin_email}') oauth_client_id = os.getenv('OAUTH_CLIENT_ID') assert oauth_client_id logger.info(f'using oauth client id: {oauth_client_id}') oauth_client_secret = os.getenv('OAUTH_CLIENT_SECRET') assert oauth_client_secret logger.info(f'using oauth client secret: {oauth_client_secret}') oauth_secret = os.getenv('OAUTH_SECRET') assert oauth_secret logger.info(f'using oauth secret: {oauth_secret}') oauth_domain = os.getenv('OAUTH_DOMAIN') assert oauth_domain logger.info(f'using domain: {oauth_domain}') django_secret_key = os.getenv('DJANGO_SECRET_KEY') assert django_secret_key logger.info(f'using DJANGO_SECRET_KEY: {django_secret_key}') credentials, project = google.auth.default() gcloud_client = container_v1.ClusterManagerClient(credentials=credentials) scan_clusters(gcloud_client, project) # FIXME add the k8s cert to a trust store urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) auth_gcloud_k8s(credentials) api_core_v1 = client.CoreV1Api() api_apps_v1 = client.AppsV1Api() api_storage_v1 = client.StorageV1Api() api_custom = client.CustomObjectsApi() api_extensions_v1_beta1 = client.ExtensionsV1beta1Api() api_ext_v1_beta1 = client.ApiextensionsV1beta1Api() api_rbac_auth_v1_b1 = client.RbacAuthorizationV1beta1Api() ensure_traefik(api_core_v1, api_ext_v1_beta1, api_apps_v1, api_custom, api_rbac_auth_v1_b1, admin_email, domain, static_ip, oauth_client_id, oauth_client_secret, oauth_domain, oauth_secret) with open(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'), 'rb') as f: gcloud_credentials_b64 = b64encode(f.read()).decode('UTF-8') ensure_secret(api=api_core_v1, name='webui-credentials', namespace='default', secret=V1Secret( metadata=client.V1ObjectMeta(name='webui-credentials'), data={'gcloud-credentials': gcloud_credentials_b64})) webui_volume_paths = [ ('data', '/opt/nipyapi/data', '20Gi', 'standard'), ] webui_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in webui_volume_paths ] webui_volume_mounts.append( V1VolumeMount(name='webui-credentials', mount_path='/root/webui', read_only=True)) dind_volume_paths = [ ('docker', '/var/lib/docker', '200Gi', 'standard'), ] dind_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in dind_volume_paths ] shared_volume_mounts = [ V1VolumeMount(name='dind-socket', mount_path='/var/run-shared') ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='admin', namespace='default', replicas=1, containers=[ V1Container( name='webui', image='aichrist/nipyapi-ds:latest', env=[ # FIXME use k8s secrets for these values V1EnvVar(name='DOMAIN', value=domain), V1EnvVar(name='STATIC_IP', value=static_ip), V1EnvVar(name='ADMIN_EMAIL', value=admin_email), V1EnvVar(name='OAUTH_CLIENT_ID', value=oauth_client_id), V1EnvVar(name='OAUTH_CLIENT_SECRET', value=oauth_client_secret), V1EnvVar(name='OAUTH_SECRET', value=oauth_secret), V1EnvVar(name='OAUTH_DOMAIN', value=oauth_domain), V1EnvVar(name='DJANGO_SECRET_KEY', value=django_secret_key), V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='GOOGLE_CLOUD_PROJECT', value=os.getenv('GOOGLE_CLOUD_PROJECT')), V1EnvVar(name='DOCKER_HOST', value='unix:///var/run-shared/docker.sock'), ], ports=[V1ContainerPort(container_port=8000)], volume_mounts=webui_volume_mounts + shared_volume_mounts), V1Container( name='dind', image='docker:19-dind', security_context=V1SecurityContext(privileged=True), command=[ 'dockerd', '-H', 'unix:///var/run-shared/docker.sock' ], volume_mounts=dind_volume_mounts + shared_volume_mounts) ], volumes=[ V1Volume(name='dind-socket', empty_dir={}), V1Volume(name='webui-credentials', projected=V1ProjectedVolumeSource(sources=[ V1VolumeProjection(secret=V1SecretProjection( name='webui-credentials', items=[ V1KeyToPath(key='gcloud-credentials', path='gcloud_credentials.json') ])) ])) ], volume_paths=webui_volume_paths + dind_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='admin', name='admin', target_name='admin', namespace='default', port_name='web', svc_port=80, target_port=8000) reg_volume_paths = [ ('database', '/opt/nifi-registry/nifi-registry-current/database', '10Gi', 'standard'), ('flow-storage', '/opt/nifi-registry/nifi-registry-current/flow_storage', '20Gi', 'standard'), ] reg_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='registry', namespace='default', replicas=1, containers=[ V1Container(name='registry', image='apache/nifi-registry:latest', env=[ V1EnvVar(name='NIFI_REGISTRY_WEB_HTTP_PORT', value='19090'), ], ports=[V1ContainerPort(container_port=19090)], volume_mounts=reg_volume_mounts), ], init_containers=[ V1Container( name='init-permissions', image='busybox', command=[ 'sh', '-c', 'chown -R 1000:1000 /opt/nifi-registry/nifi-registry-current' ], volume_mounts=[ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ]) ], volumes=[], volume_paths=reg_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='registry', name='registry', target_name='registry', namespace='default', port_name='web', svc_port=80, target_port=19090) perform_nifi_ops(api_apps_v1, api_core_v1, api_custom, domain) perform_build_ops_bg() perform_mirror_ops_bg()
def create_gcloud( num_workers, release, kubernetes_version, machine_type, disk_size, num_cpus, num_gpus, gpu_type, zone, project, preemptible, custom_value, ): from google.cloud import container_v1 import google.auth from google.auth.exceptions import DefaultCredentialsError from googleapiclient import discovery, http try: credentials, default_project = google.auth.default() except DefaultCredentialsError: raise click.UsageError( "Couldn't find gcloud credentials. Install the gcloud" " sdk ( https://cloud.google.com/sdk/docs/quickstart-linux ) and " "run 'gcloud auth application-default login' to login and create " "your credentials.") assert num_workers >= 2, "Number of workers should be at least 2" if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name = "{}-{}".format(release, num_workers) name_path = "projects/{}/locations/{}/".format(project, zone) extraargs = {} if num_gpus > 0: extraargs["accelerators"] = [ container_v1.types.AcceleratorConfig(accelerator_count=num_gpus, accelerator_type=gpu_type) ] # delete existing firewall, if any firewalls = discovery.build("compute", "v1", cache_discovery=False).firewalls() existing_firewalls = firewalls.list(project=project).execute() fw_name = "{}-firewall".format(name) if any(f["name"] == fw_name for f in existing_firewalls["items"]): response = {} while not hasattr(response, "status"): try: response = firewalls.delete(project=project, firewall=fw_name).execute() except http.HttpError as e: if e.resp.status == 404: response = {} break click.echo("Wait for firewall to be available for deletion") sleep(5) response = {} while hasattr(response, "status") and response.status < response.DONE: response = gclient.get_operation(None, None, None, name=response.selfLink) sleep(1) # create cluster cluster = container_v1.types.Cluster( name=name, initial_node_count=num_workers, node_config=container_v1.types.NodeConfig( machine_type=machine_type, disk_size_gb=disk_size, preemptible=preemptible, oauth_scopes=[ "https://www.googleapis.com/auth/devstorage.full_control", ], **extraargs, ), addons_config=container_v1.types.AddonsConfig( http_load_balancing=container_v1.types.HttpLoadBalancing( disabled=True, ), horizontal_pod_autoscaling=container_v1.types. HorizontalPodAutoscaling(disabled=True, ), kubernetes_dashboard=container_v1.types.KubernetesDashboard( disabled=True, ), network_policy_config=container_v1.types.NetworkPolicyConfig( disabled=False, ), ), logging_service=None, monitoring_service=None, ) response = gclient.create_cluster(None, None, cluster, parent=name_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation(None, None, None, name=name_path + "/" + response.name) sleep(1) if response.status != response.DONE: raise ValueError("Cluster creation failed!") cluster = gclient.get_cluster(None, None, None, name=name_path + "/" + name) auth_req = google.auth.transport.requests.Request() credentials.refresh(auth_req) configuration = client.Configuration() configuration.host = f"https://{cluster.endpoint}:443" configuration.verify_ssl = False configuration.api_key = {"authorization": "Bearer " + credentials.token} client.Configuration.set_default(configuration) if num_gpus > 0: with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r: dep = yaml.safe_load(r) dep["spec"]["selector"] = { "matchLabels": dep["spec"]["template"]["metadata"]["labels"] } dep = client.ApiClient()._ApiClient__deserialize( dep, "V1DaemonSet") k8s_client = client.AppsV1Api() k8s_client.create_namespaced_daemon_set("kube-system", body=dep) # create tiller service account client.CoreV1Api().create_namespaced_service_account( "kube-system", { "apiVersion": "v1", "kind": "ServiceAccount", "metadata": { "name": "tiller", "generateName": "tiller", "namespace": "kube-system", }, }, ) client.RbacAuthorizationV1beta1Api().create_cluster_role_binding({ "apiVersion": "rbac.authorization.k8s.io/v1beta1", "kind": "ClusterRoleBinding", "metadata": { "name": "tiller" }, "roleRef": { "apiGroup": "rbac.authorization.k8s.io", "kind": "ClusterRole", "name": "cluster-admin", }, "subjects": [{ "kind": "ServiceAccount", "name": "tiller", "namespace": "kube-system" }], }) # deploy tiller tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE) tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT) client.CoreV1Api().create_namespaced_service("kube-system", tiller_service) client.ExtensionsV1beta1Api().create_namespaced_deployment( "kube-system", tiller_dep) sleep(1) pods = client.CoreV1Api().list_namespaced_pod(namespace="kube-system", label_selector="app=helm") tiller_pod = pods.items[0] while True: # Wait for tiller resp = client.CoreV1Api().read_namespaced_pod( namespace="kube-system", name=tiller_pod.metadata.name) if resp.status.phase != "Pending": break sleep(5) # kubernetes python doesn't currently support port forward # https://github.com/kubernetes-client/python/issues/166 ports = 44134 # resp = stream( # client.CoreV1Api().connect_get_namespaced_pod_portforward, # name=tiller_pod.metadata.name, # namespace=tiller_pod.metadata.namespace, # ports=ports # ) with subprocess.Popen([ "kubectl", "port-forward", "--namespace={}".format(tiller_pod.metadata.namespace), tiller_pod.metadata.name, "{0}:{0}".format(ports), "--server={}".format(configuration.host), "--token={}".format(credentials.token), "--insecure-skip-tls-verify=true", ]) as portforward: sleep(5) # install chart tiller = Tiller("localhost") chart = ChartBuilder({ "name": "mlbench-helm", "source": { "type": "git", "location": "https://github.com/mlbench/mlbench-helm", }, }) values = { "limits": { "workers": num_workers - 1, "gpu": num_gpus, "cpu": num_cpus } } if custom_value: # merge custom values with values for cv in custom_value: key, v = cv.split("=", 1) current = values key_path = key.split(".") for k in key_path[:-1]: if k not in current: current[k] = {} current = current[k] current[key_path[-1]] = v tiller.install_release( chart.get_helm_chart(), name=name, wait=True, dry_run=False, namespace="default", values=values, ) portforward.terminate() # open port in firewall mlbench_client = ApiClient(in_cluster=False, load_config=False) firewall_body = { "name": fw_name, "direction": "INGRESS", "sourceRanges": "0.0.0.0/0", "allowed": [{ "IPProtocol": "tcp", "ports": [mlbench_client.port] }], } firewalls.insert(project=project, body=firewall_body).execute() config = get_config() config.set("general", "provider", "gke") config.set("gke", "cluster", cluster.endpoint) write_config(config) click.echo("MLBench successfully deployed")
def get_role_client(): config_kube() role_client = client.RbacAuthorizationV1beta1Api() return role_client
def create_gcloud(num_workers, release, kubernetes_version, machine_type, disk_size, num_cpus, num_gpus, gpu_type, zone, project, preemptible, custom_value): from google.cloud import container_v1 import google.auth from googleapiclient import discovery, http credentials, default_project = google.auth.default() if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name = '{}-{}'.format(release, num_workers) name_path = 'projects/{}/locations/{}/'.format(project, zone) extraargs = {} if num_gpus > 0: extraargs['accelerators'] = [container_v1.types.AcceleratorConfig( accelerator_count=num_gpus, accelerator_type=gpu_type)] # delete existing firewall, if any firewalls = discovery.build( 'compute', 'v1', cache_discovery=False).firewalls() existing_firewalls = firewalls.list(project=project).execute() fw_name = '{}-firewall'.format(name) if any(f['name'] == fw_name for f in existing_firewalls['items']): response = {} while not hasattr(response, 'status'): try: response = firewalls.delete( project=project, firewall=fw_name).execute() except http.HttpError as e: if e.resp.status == 404: response = {} break click.echo("Wait for firewall to be available for deletion") sleep(5) response = {} while hasattr(response, 'status') and response.status < response.DONE: response = gclient.get_operation( None, None, None, name=response.selfLink) sleep(1) # create cluster cluster = container_v1.types.Cluster( name=name, initial_node_count=num_workers, node_config=container_v1.types.NodeConfig( machine_type=machine_type, disk_size_gb=disk_size, preemptible=preemptible, oauth_scopes=[ 'https://www.googleapis.com/auth/devstorage.full_control', ], **extraargs ), addons_config=container_v1.types.AddonsConfig( http_load_balancing=container_v1.types.HttpLoadBalancing( disabled=True, ), horizontal_pod_autoscaling= container_v1.types.HorizontalPodAutoscaling( disabled=True, ), kubernetes_dashboard=container_v1.types.KubernetesDashboard( disabled=True, ), network_policy_config=container_v1.types.NetworkPolicyConfig( disabled=False, ), ), logging_service=None, monitoring_service=None ) response = gclient.create_cluster(None, None, cluster, parent=name_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation( None, None, None, name=name_path + '/' + response.name) sleep(1) if response.status != response.DONE: raise ValueError('Cluster creation failed!') cluster = gclient.get_cluster( None, None, None, name=name_path + '/' + name) auth_req = google.auth.transport.requests.Request() credentials.refresh(auth_req) configuration = client.Configuration() configuration.host = f'https://{cluster.endpoint}:443' configuration.verify_ssl = False configuration.api_key = {'authorization': 'Bearer ' + credentials.token} client.Configuration.set_default(configuration) if num_gpus > 0: with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r: dep = yaml.safe_load(r) dep['spec']['selector'] = { 'matchLabels': dep['spec']['template']['metadata']['labels'] } dep = client.ApiClient()._ApiClient__deserialize(dep, 'V1DaemonSet') k8s_client = client.AppsV1Api() k8s_client.create_namespaced_daemon_set('kube-system', body=dep) # create tiller service account client.CoreV1Api().create_namespaced_service_account( 'kube-system', { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': { 'name': 'tiller', 'generateName': 'tiller', 'namespace': 'kube-system', }, }) client.RbacAuthorizationV1beta1Api().create_cluster_role_binding( { 'apiVersion': 'rbac.authorization.k8s.io/v1beta1', 'kind': 'ClusterRoleBinding', 'metadata': { 'name': 'tiller' }, 'roleRef': { 'apiGroup': 'rbac.authorization.k8s.io', 'kind': 'ClusterRole', 'name': 'cluster-admin' }, 'subjects': [ { 'kind': 'ServiceAccount', 'name': 'tiller', 'namespace': 'kube-system' } ] }) # deploy tiller tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE) tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT) client.CoreV1Api().create_namespaced_service( 'kube-system', tiller_service) client.ExtensionsV1beta1Api().create_namespaced_deployment( 'kube-system', tiller_dep) sleep(1) pods = client.CoreV1Api().list_namespaced_pod( namespace='kube-system', label_selector='app=helm' ) tiller_pod = pods.items[0] while True: # Wait for tiller resp = client.CoreV1Api().read_namespaced_pod( namespace='kube-system', name=tiller_pod.metadata.name ) if resp.status.phase != 'Pending': break sleep(5) # kubernetes python doesn't currently support port forward # https://github.com/kubernetes-client/python/issues/166 ports = 44134 # resp = stream( # client.CoreV1Api().connect_get_namespaced_pod_portforward, # name=tiller_pod.metadata.name, # namespace=tiller_pod.metadata.namespace, # ports=ports # ) with subprocess.Popen([ 'kubectl', 'port-forward', '--namespace={}'.format(tiller_pod.metadata.namespace), tiller_pod.metadata.name, '{0}:{0}'.format(ports), '--server={}'.format(configuration.host), '--token={}'.format(credentials.token), '--insecure-skip-tls-verify=true']) as portforward: sleep(5) # install chart tiller = Tiller('localhost') chart = ChartBuilder( { 'name': 'mlbench-helm', 'source': { 'type': 'git', 'location': 'https://github.com/mlbench/mlbench-helm' }}) values = { 'limits': { 'workers': num_workers - 1, 'gpu': num_gpus, 'cpu': num_cpus } } if custom_value: # merge custom values with values for cv in custom_value: key, v = cv.split("=", 1) current = values key_path = key.split(".") for k in key_path[:-1]: if k not in current: current[k] = {} current = current[k] current[key_path[-1]] = v tiller.install_release( chart.get_helm_chart(), name=name, wait=True, dry_run=False, namespace='default', values=values) portforward.terminate() # open port in firewall mlbench_client = ApiClient(in_cluster=False, load_config=False) firewall_body = { "name": fw_name, "direction": "INGRESS", "sourceRanges": "0.0.0.0/0", "allowed": [ {"IPProtocol": "tcp", "ports": [mlbench_client.port]} ] } firewalls.insert(project=project, body=firewall_body).execute() config = get_config() config.set('general', 'provider', 'gke') config.set('gke', 'cluster', cluster.endpoint) write_config(config) click.echo("MLBench successfully deployed")
def _get_client(self, context): conf = config.new_client_from_config(context=context) return (k8s_client.CoreV1Api(conf), k8s_client.RbacAuthorizationV1beta1Api(conf))
def __init__(self, cluster_name="default-cluster", kubernetes_proxy_addr=None, redis_ip=None, redis_port=6379, useInternalIP=False, namespace='default', service_types=None, create_namespace_if_not_exists=False): """ Parameters ---------- cluster_name : str A unique name for this Clipper cluster. This can be used to run multiple Clipper clusters on the same Kubernetes cluster without interfering with each other. Kubernetes cluster name must follow Kubernetes label value naming rule, namely: Valid label values must be 63 characters or less and must be empty or begin and end with an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_), dots (.), and alphanumerics between. See more at: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set kubernetes_proxy_addr : str, optional The proxy address if you are proxying connections locally using ``kubectl proxy``. If this argument is provided, Clipper will construct the appropriate proxy URLs for accessing Clipper's Kubernetes services, rather than using the API server addres provided in your kube config. redis_ip : str, optional The address of a running Redis cluster. If set to None, Clipper will start a Redis deployment for you. redis_port : int, optional The Redis port. If ``redis_ip`` is set to None, Clipper will start Redis on this port. If ``redis_ip`` is provided, Clipper will connect to Redis on this port. useInternalIP : bool, optional Use Internal IP of the K8S nodes . If ``useInternalIP`` is set to False, Clipper will throw an exception if none of the nodes have ExternalDNS. If ``useInternalIP`` is set to true, Clipper will use the Internal IP of the K8S node if no ExternalDNS exists for any of the nodes. namespace: str, optional The Kubernetes namespace to use . If this argument is provided, all Clipper artifacts and resources will be created in this k8s namespace. If not "default" namespace is used. service_types: dict, optional Specify what kind of Kubernetes service you want. You must use predefined 'ServiceTypes' in Kubernetes as value. See more at: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types For example, service_types = { 'redis': 'NodePort', 'management': 'LoadBalancer', 'query': 'LoadBalancer', 'query-rpc': 'ClusterIP', 'metric': 'LoadBalancer' } create_namespace_if_not_exists: bool, False Create a k8s namespace if the namespace doesnt already exist. If this argument is provided and the k8s namespace does not exist a new k8s namespace will be created. Note ---- Clipper stores all persistent configuration state (such as registered application and model information) in Redis. If you want Clipper to be durable and able to recover from failures, we recommend configuring your own persistent and replicated Redis cluster rather than letting Clipper launch one for you. """ self.cluster_name = cluster_name if kubernetes_proxy_addr is not None: self.kubernetes_proxy_addr = kubernetes_proxy_addr self.use_k8s_proxy = True else: self.use_k8s_proxy = False self.redis_ip = redis_ip self.redis_port = redis_port self.useInternalIP = useInternalIP config.load_kube_config() configuration.assert_hostname = False self._k8s_v1 = client.CoreV1Api() self._k8s_beta = client.ExtensionsV1beta1Api() self._k8s_rbac = client.RbacAuthorizationV1beta1Api() # Create the template engine # Config: Any variable missing -> Error self.template_engine = jinja2.Environment( loader=jinja2.FileSystemLoader(cur_dir, followlinks=True), undefined=jinja2.StrictUndefined) # Check if namespace exists and if create flag set ...create the namespace or throw error namespaces = [] for ns in self._k8s_v1.list_namespace().items: namespaces.append(ns.metadata.name) if namespace in namespaces: self.k8s_namespace = namespace elif create_namespace_if_not_exists: body = client.V1Namespace() body.metadata = client.V1ObjectMeta(name=namespace) try: self._k8s_v1.create_namespace(body) except ApiException as e: logging.error( "Exception creating Kubernetes namespace: {}".format(e)) raise ClipperException( "Could not create Kubernetes namespace. " "Reason: {}".format(e.reason)) self.k8s_namespace = namespace else: msg = "Error connecting to Kubernetes cluster. Namespace does not exist. You can pass in KubernetesContainerManager(create_namespace_if_not_exists=True) to crate this namespcae" logger.error(msg) raise ClipperException(msg) # Initialize logger with cluster identifier if self.k8s_namespace != "default": self.cluster_identifier = "{cluster}".format( cluster=self.cluster_name) else: self.cluster_identifier = "{ns}-{cluster}".format( ns=self.k8s_namespace, cluster=self.cluster_name) self.logger = ClusterAdapter(logger, {'cluster_name': self.cluster_identifier}) self.service_types = self._determine_service_types(service_types)
def factory(self, kind): if kind == 'Service': api_instance = client.CoreV1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_service, 'list_namespaced_resource': api_instance.list_namespaced_service, 'create_namespaced_resource': api_instance.create_namespaced_service, 'patch_namespaced_resource': api_instance.patch_namespaced_service }) elif kind == 'ServiceAccount': api_instance = client.CoreV1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_service_account, 'list_namespaced_resource': api_instance.list_namespaced_service_account, 'create_namespaced_resource': api_instance.create_namespaced_service_account, 'patch_namespaced_resource': api_instance.patch_namespaced_service_account }) elif kind == 'Deployment': api_instance = client.AppsV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_deployment, 'list_namespaced_resource': api_instance.list_namespaced_deployment, 'create_namespaced_resource': api_instance.create_namespaced_deployment, 'patch_namespaced_resource': api_instance.patch_namespaced_deployment }) elif kind == 'ClusterRole': api_instance = client.RbacAuthorizationV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_resource': api_instance.read_cluster_role, 'create_resource': api_instance.create_cluster_role, 'patch_resource': api_instance.patch_cluster_role }) elif kind == 'Role': api_instance = client.RbacAuthorizationV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_role, 'create_namespaced_resource': api_instance.create_namespaced_role, 'patch_namespaced_resource': api_instance.patch_namespaced_role }) elif kind == 'RoleBinding': api_instance = client.RbacAuthorizationV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_role_binding, 'create_namespaced_resource': api_instance.create_namespaced_role_binding, 'patch_namespaced_resource': api_instance.patch_namespaced_role_binding }) elif kind == 'ClusterRoleBinding': api_instance = client.RbacAuthorizationV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_resource': api_instance.read_cluster_role_binding, 'create_resource': api_instance.create_cluster_role_binding, 'patch_resource': api_instance.patch_cluster_role_binding }) elif kind == 'StatefulSet': api_instance = client.AppsV1beta1Api() api_instance = Adapter(api_instance, **{ 'read_namespaced_resource': api_instance.read_namespaced_stateful_set, 'list_namespaced_resource': api_instance.list_namespaced_stateful_set, 'create_namespaced_resource': api_instance.create_namespaced_stateful_set, 'patch_namespaced_resource': api_instance.patch_namespaced_stateful_set }) else: # TODO: implement all resource kinds raise TypeError('kind {} not implemented'.format(kind)) return api_instance
def get_resources(self, resource, namespace): names = [] config.load_kube_config() v1 = client.CoreV1Api() v1Beta1 = client.AppsV1beta1Api() extensionsV1Beta1 = client.ExtensionsV1beta1Api() autoscalingV1Api = client.AutoscalingV1Api() rbacAPi = client.RbacAuthorizationV1beta1Api() batchV1Api = client.BatchV1Api() batchV2Api = client.BatchV2alpha1Api() if resource == "pod": ret = v1.list_pod_for_all_namespaces(watch=False) elif resource == "service": ret = v1.list_service_for_all_namespaces(watch=False) elif resource == "deployment": ret = v1Beta1.list_deployment_for_all_namespaces(watch=False) elif resource == "statefulset": ret = v1Beta1.list_stateful_set_for_all_namespaces(watch=False) elif resource == "node": ret = v1.list_node(watch=False) elif resource == "namespace": ret = v1.list_namespace(watch=False) elif resource == "daemonset": ret = extensionsV1Beta1.list_daemon_set_for_all_namespaces(watch=False) elif resource == "networkpolicy": ret = extensionsV1Beta1.list_network_policy_for_all_namespaces(watch=False) elif resource == "thirdpartyresource": ret = extensionsV1Beta1.list_third_party_resource(watch=False) elif resource == "replicationcontroller": ret = v1.list_replication_controller_for_all_namespaces(watch=False) elif resource == "replicaset": ret = extensionsV1Beta1.list_replica_set_for_all_namespaces(watch=False) elif resource == "ingress": ret = extensionsV1Beta1.list_ingress_for_all_namespaces(watch=False) elif resource == "endpoints": ret = v1.list_endpoints_for_all_namespaces(watch=False) elif resource == "configmap": ret = v1.list_config_map_for_all_namespaces(watch=False) elif resource == "event": ret = v1.list_event_for_all_namespaces(watch=False) elif resource == "limitrange": ret = v1.list_limit_range_for_all_namespaces(watch=False) elif resource == "configmap": ret = v1.list_config_map_for_all_namespaces(watch=False) elif resource == "persistentvolume": ret = v1.list_persistent_volume(watch=False) elif resource == "secret": ret = v1.list_secret_for_all_namespaces(watch=False) elif resource == "resourcequota": ret = v1.list_resource_quota_for_all_namespaces(watch=False) elif resource == "componentstatus": ret = v1.list_component_status(watch=False) elif resource == "podtemplate": ret = v1.list_pod_template_for_all_namespaces(watch=False) elif resource == "serviceaccount": ret = v1.list_service_account_for_all_namespaces(watch=False) elif resource == "horizontalpodautoscaler": ret = autoscalingV1Api.list_horizontal_pod_autoscaler_for_all_namespaces(watch=False) elif resource == "clusterrole": ret = rbacAPi.list_cluster_role(watch=False) elif resource == "clusterrolebinding": ret = rbacAPi.list_cluster_role_binding(watch=False) elif resource == "job": ret = batchV1Api.list_job_for_all_namespaces(watch=False) elif resource == "cronjob": ret = batchV2Api.list_cron_job_for_all_namespaces(watch=False) elif resource == "scheduledjob": ret = batchV2Api.list_scheduled_job_for_all_namespaces(watch=False) for i in ret.items: names.append((i.metadata.name, i.metadata.namespace)) return names
def rbac_v1_api(self): return client.RbacAuthorizationV1beta1Api(self.configuration)