Exemple #1
0
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        try:
            config_file = os.path.expanduser(kubeconfig_filepath)
            config.load_kube_config(config_file=config_file)
        except:
            self.logger.warning("unable to load kube-config")

        self.v1 = client.CoreV1Api()
        self.v1Beta1 = client.AppsV1beta1Api()
        self.extensionsV1Beta1 = client.ExtensionsV1beta1Api()
        self.autoscalingV1Api = client.AutoscalingV1Api()
        self.rbacApi = client.RbacAuthorizationV1beta1Api()
        self.batchV1Api = client.BatchV1Api()
        self.batchV2Api = client.BatchV2alpha1Api()
Exemple #2
0
def kube_apis(cli_arguments) -> KubeApis:
    """
    Set up kubernets-client to operate in cluster.

    :param cli_arguments: a set of command-line arguments
    :return: KubeApis
    """
    context_name = cli_arguments['context']
    kubeconfig = cli_arguments['kubeconfig']
    config.load_kube_config(config_file=kubeconfig,
                            context=context_name,
                            persist_config=False)
    v1 = client.CoreV1Api()
    extensions_v1_beta1 = client.ExtensionsV1beta1Api()
    rbac_v1_beta1 = client.RbacAuthorizationV1beta1Api()
    return KubeApis(v1, extensions_v1_beta1, rbac_v1_beta1)
Exemple #3
0
    def __init__(self):
        """
        Load the kube config and create an instance of the Kubernetes API client.
        """
        if environ.get('KUBERNETES_PORT'):
            config.load_incluster_config()
        else:
            config.load_kube_config()

        self.client = client
        self._apiClient = client.ApiClient()
        self.coreV1 = client.CoreV1Api(self._apiClient)
        self.rbacV1Beta1 = client.RbacAuthorizationV1beta1Api(self._apiClient)
        self.extV1Beta1 = client.ExtensionsV1beta1Api(self._apiClient)
        self.appsV1 = client.AppsV1beta1Api()
        self.StorageV1beta1Api = client.StorageV1beta1Api()
Exemple #4
0
def api_client_from_version(api_version):
    return {
        "v1":
        kube_client.CoreV1Api(),
        "apps/v1":
        kube_client.AppsV1Api(),
        "batch/v1":
        kube_client.BatchV1Api(),
        "batch/v1beta1":
        kube_client.BatchV1beta1Api(),
        "extensions/v1beta1":
        kube_client.ExtensionsV1beta1Api(),
        "rbac.authorization.k8s.io/v1beta1":
        kube_client.RbacAuthorizationV1beta1Api(),
        "rbac.authorization.k8s.io/v1":
        kube_client.RbacAuthorizationV1Api(),
    }[api_version]
Exemple #5
0
    def create_kubeconfig(self, api_server_ip: str):
        with open("%s/manifests/kubeconfig/clusterrolebinding.yaml" % self.euid_path) as f:
            clusterrolebinding_manifest = yaml.load(f)

        c = kubeclient.ApiClient(host="http://%s:%d" % (api_server_ip, self.ec.kubernetes_apiserver_insecure_port))
        rbac = kubeclient.RbacAuthorizationV1beta1Api(c)
        rbac.create_cluster_role_binding(clusterrolebinding_manifest)

        kube_config = {
            'preferences': {'colors': True},
            'users': [
                {
                    'user': {
                        'client-key': '%s/kubernetes_kubelet.private_key' % self.test_certs_path,
                        'client-certificate': '%s/kubernetes_kubelet.certificate' % self.test_certs_path
                    },
                    'name': 'enjoliver.local'
                }
            ],
            'kind': 'Config',
            'apiVersion': 'v1',
            'clusters': [
                {
                    'cluster': {
                        'server': "https://%s:6443" % api_server_ip,
                        'certificate-authority': '%s/kubernetes_kubelet.issuing_ca' % self.test_certs_path
                    },
                    'name': 'enjoliver'
                }
            ],
            'contexts': [
                {
                    'name': 'e',
                    'context': {
                        'cluster': 'enjoliver',
                        'namespace': 'kube-system',
                        'user': '******'
                    }
                }
            ],
            'current-context': 'e'
        }
        with open(os.path.join(self.tests_path, "testing_kubeconfig.yaml"), "w") as kc:
            yaml.dump(kube_config, kc)
Exemple #6
0
    def deploy_unique_rbac_resources(self):
        """
        The cluster-wide RBAC resources (clusterrole/clusterroldbinding) are
        not namespaced, so they have to be handled specially to ensure they are
        unique amongst potentially multiple deployments of the agent in the
        same cluster.  Basically just sticks the test namespace as a suffix to
        the resource names.
        """
        corev1 = kube_client.CoreV1Api()
        rbacv1beta1 = kube_client.RbacAuthorizationV1beta1Api()

        serviceaccount = corev1.create_namespaced_service_account(
            body=load_resource_yaml(AGENT_SERVICEACCOUNT_PATH),
            namespace=self.namespace)

        clusterrole_base = load_resource_yaml(AGENT_CLUSTERROLE_PATH)
        clusterrole_base["metadata"][
            "name"] = f"signalfx-agent-{self.namespace}"
        clusterrole = rbacv1beta1.create_cluster_role(body=clusterrole_base)

        crb_base = load_resource_yaml(AGENT_CLUSTERROLEBINDING_PATH)
        # Make the binding refer to our testing namespace's role and service account
        crb_base["metadata"]["name"] = f"signalfx-agent-{self.namespace}"
        crb_base["roleRef"]["name"] = clusterrole.metadata.name
        crb_base["subjects"][0]["namespace"] = self.namespace
        crb = rbacv1beta1.create_cluster_role_binding(body=crb_base)

        try:
            yield
        finally:
            delete_opts = kube_client.V1DeleteOptions(
                grace_period_seconds=0, propagation_policy="Background")

            rbacv1beta1.delete_cluster_role_binding(crb.metadata.name,
                                                    body=delete_opts)
            rbacv1beta1.delete_cluster_role(clusterrole.metadata.name,
                                            body=delete_opts)
            corev1.delete_namespaced_service_account(
                serviceaccount.metadata.name,
                namespace=self.namespace,
                body=delete_opts)
            print("Deleted RBAC resources")
Exemple #7
0
def kube_apis(cli_arguments) -> KubeApis:
    """
    Set up kubernets-client to operate in cluster.

    :param cli_arguments: a set of command-line arguments
    :return: KubeApis
    """
    context_name = cli_arguments["context"]
    kubeconfig = cli_arguments["kubeconfig"]
    config.load_kube_config(config_file=kubeconfig,
                            context=context_name,
                            persist_config=False)
    v1 = client.CoreV1Api()
    extensions_v1_beta1 = client.ExtensionsV1beta1Api()
    apps_v1_api = client.AppsV1Api()
    rbac_v1_beta1 = client.RbacAuthorizationV1beta1Api()
    api_extensions_v1_beta1 = client.ApiextensionsV1beta1Api()
    custom_objects = client.CustomObjectsApi()
    return KubeApis(v1, extensions_v1_beta1, apps_v1_api, rbac_v1_beta1,
                    api_extensions_v1_beta1, custom_objects)
Exemple #8
0
    def create_tiller(self, api_server_ip: str):
        c = kubeclient.ApiClient(host="http://%s:%d" % (api_server_ip, self.ec.kubernetes_apiserver_insecure_port))

        with open("%s/manifests/tiller/tiller-service.yaml" % self.euid_path) as f:
            service_manifest = yaml.load(f)

        with open("%s/manifests/tiller/tiller-deploy.yaml" % self.euid_path) as f:
            deploy_manifest = yaml.load(f)

        with open("%s/manifests/tiller/tiller-service-account.yaml" % self.euid_path) as f:
            serviceaccount_manifest = yaml.load(f)

        with open("%s/manifests/tiller/clusterrolebinding.yaml" % self.euid_path) as f:
            clusterrolebinding_manifest = yaml.load(f)

        core, beta = kubeclient.CoreV1Api(c), kubeclient.ExtensionsV1beta1Api(c)
        rbac = kubeclient.RbacAuthorizationV1beta1Api(c)

        core.create_namespaced_service("kube-system", service_manifest)
        rbac.create_cluster_role_binding(clusterrolebinding_manifest)
        core.create_namespaced_service_account("kube-system", serviceaccount_manifest)
        beta.create_namespaced_deployment("kube-system", deploy_manifest)
Exemple #9
0
def create_tiller_service_accounts(api_client):
    logging.info("Creating service account for tiller.")
    api = k8s_client.CoreV1Api(api_client)
    body = yaml.load("""apiVersion: v1
kind: ServiceAccount
metadata:
  name: tiller
  namespace: kube-system""")
    try:
        api.create_namespaced_service_account("kube-system", body)
    except rest.ApiException as e:
        if e.status == 409:
            logging.info("Service account tiller already exists.")
        else:
            raise
    body = yaml.load("""apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: tiller
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
  - kind: ServiceAccount
    name: tiller
    namespace: kube-system
""")
    rbac_api = k8s_client.RbacAuthorizationV1beta1Api(api_client)
    try:
        rbac_api.create_cluster_role_binding(body)
    except rest.ApiException as e:
        if e.status == 409:
            logging.info(
                "Role binding for service account tiller already exists.")
        else:
            raise
Exemple #10
0
    def __init__(self, **kwargs):
        """
        Set cluster and prepare clients for all used resource types.

        Args:
            **kwargs: Keyword arguments (cluster is required)
        """
        # load configuration
        try:
            self.cluster = kwargs['cluster']
        except KeyError:
            raise ValueError('Missing parameter cluster')

        logger.debug('Initialized KubernetesAPI for {}'.format(self.cluster))

        # set apis
        api_client = self.get_api_client()

        self.api_corev1 = client.CoreV1Api(api_client=api_client)
        self.api_extensionsv1beta1 = client.ExtensionsV1beta1Api(
            api_client=api_client)
        self.api_rbacauthorizationv1beta1 = client.RbacAuthorizationV1beta1Api(
            api_client=api_client)
        self.api_version = client.VersionApi(api_client=api_client)
Exemple #11
0
 def create_rbac_api(self):
     cfg = self.get_kubecfg()
     return client.RbacAuthorizationV1beta1Api(cfg)
Exemple #12
0
def perform_cloud_ops():
    # set GOOGLE_APPLICATION_CREDENTIALS env to credentials file
    # set GOOGLE_CLOUD_PROJECT env to project id

    domain = os.getenv('DOMAIN')
    assert domain
    logger.info(f'using domain: {domain}')

    static_ip = os.getenv('STATIC_IP')
    assert static_ip
    logger.info(f'using static IP: {static_ip}')

    admin_email = os.getenv('ADMIN_EMAIL')
    assert admin_email
    logger.info(f'using ACME admin email: {admin_email}')

    oauth_client_id = os.getenv('OAUTH_CLIENT_ID')
    assert oauth_client_id
    logger.info(f'using oauth client id: {oauth_client_id}')

    oauth_client_secret = os.getenv('OAUTH_CLIENT_SECRET')
    assert oauth_client_secret
    logger.info(f'using oauth client secret: {oauth_client_secret}')

    oauth_secret = os.getenv('OAUTH_SECRET')
    assert oauth_secret
    logger.info(f'using oauth secret: {oauth_secret}')

    oauth_domain = os.getenv('OAUTH_DOMAIN')
    assert oauth_domain
    logger.info(f'using domain: {oauth_domain}')

    django_secret_key = os.getenv('DJANGO_SECRET_KEY')
    assert django_secret_key
    logger.info(f'using DJANGO_SECRET_KEY: {django_secret_key}')

    credentials, project = google.auth.default()
    gcloud_client = container_v1.ClusterManagerClient(credentials=credentials)

    scan_clusters(gcloud_client, project)

    # FIXME add the k8s cert to a trust store
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

    auth_gcloud_k8s(credentials)

    api_core_v1 = client.CoreV1Api()
    api_apps_v1 = client.AppsV1Api()
    api_storage_v1 = client.StorageV1Api()
    api_custom = client.CustomObjectsApi()
    api_extensions_v1_beta1 = client.ExtensionsV1beta1Api()
    api_ext_v1_beta1 = client.ApiextensionsV1beta1Api()
    api_rbac_auth_v1_b1 = client.RbacAuthorizationV1beta1Api()

    ensure_traefik(api_core_v1, api_ext_v1_beta1, api_apps_v1, api_custom,
                   api_rbac_auth_v1_b1, admin_email, domain, static_ip,
                   oauth_client_id, oauth_client_secret, oauth_domain,
                   oauth_secret)

    with open(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'), 'rb') as f:
        gcloud_credentials_b64 = b64encode(f.read()).decode('UTF-8')

    ensure_secret(api=api_core_v1,
                  name='webui-credentials',
                  namespace='default',
                  secret=V1Secret(
                      metadata=client.V1ObjectMeta(name='webui-credentials'),
                      data={'gcloud-credentials': gcloud_credentials_b64}))
    webui_volume_paths = [
        ('data', '/opt/nipyapi/data', '20Gi', 'standard'),
    ]
    webui_volume_mounts = [
        V1VolumeMount(name=path[0], mount_path=path[1])
        for path in webui_volume_paths
    ]
    webui_volume_mounts.append(
        V1VolumeMount(name='webui-credentials',
                      mount_path='/root/webui',
                      read_only=True))

    dind_volume_paths = [
        ('docker', '/var/lib/docker', '200Gi', 'standard'),
    ]
    dind_volume_mounts = [
        V1VolumeMount(name=path[0], mount_path=path[1])
        for path in dind_volume_paths
    ]
    shared_volume_mounts = [
        V1VolumeMount(name='dind-socket', mount_path='/var/run-shared')
    ]
    ensure_statefulset_with_containers(
        api_apps_v1=api_apps_v1,
        name='admin',
        namespace='default',
        replicas=1,
        containers=[
            V1Container(
                name='webui',
                image='aichrist/nipyapi-ds:latest',
                env=[
                    # FIXME use k8s secrets for these values
                    V1EnvVar(name='DOMAIN', value=domain),
                    V1EnvVar(name='STATIC_IP', value=static_ip),
                    V1EnvVar(name='ADMIN_EMAIL', value=admin_email),
                    V1EnvVar(name='OAUTH_CLIENT_ID', value=oauth_client_id),
                    V1EnvVar(name='OAUTH_CLIENT_SECRET',
                             value=oauth_client_secret),
                    V1EnvVar(name='OAUTH_SECRET', value=oauth_secret),
                    V1EnvVar(name='OAUTH_DOMAIN', value=oauth_domain),
                    V1EnvVar(name='DJANGO_SECRET_KEY',
                             value=django_secret_key),
                    V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS',
                             value='/root/webui/gcloud_credentials.json'),
                    V1EnvVar(name='CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE',
                             value='/root/webui/gcloud_credentials.json'),
                    V1EnvVar(name='GOOGLE_CLOUD_PROJECT',
                             value=os.getenv('GOOGLE_CLOUD_PROJECT')),
                    V1EnvVar(name='DOCKER_HOST',
                             value='unix:///var/run-shared/docker.sock'),
                ],
                ports=[V1ContainerPort(container_port=8000)],
                volume_mounts=webui_volume_mounts + shared_volume_mounts),
            V1Container(
                name='dind',
                image='docker:19-dind',
                security_context=V1SecurityContext(privileged=True),
                command=[
                    'dockerd', '-H', 'unix:///var/run-shared/docker.sock'
                ],
                volume_mounts=dind_volume_mounts + shared_volume_mounts)
        ],
        volumes=[
            V1Volume(name='dind-socket', empty_dir={}),
            V1Volume(name='webui-credentials',
                     projected=V1ProjectedVolumeSource(sources=[
                         V1VolumeProjection(secret=V1SecretProjection(
                             name='webui-credentials',
                             items=[
                                 V1KeyToPath(key='gcloud-credentials',
                                             path='gcloud_credentials.json')
                             ]))
                     ]))
        ],
        volume_paths=webui_volume_paths + dind_volume_paths)
    ensure_ingress_routed_svc(api_core_v1=api_core_v1,
                              api_custom=api_custom,
                              domain=domain,
                              hostname='admin',
                              name='admin',
                              target_name='admin',
                              namespace='default',
                              port_name='web',
                              svc_port=80,
                              target_port=8000)
    reg_volume_paths = [
        ('database', '/opt/nifi-registry/nifi-registry-current/database',
         '10Gi', 'standard'),
        ('flow-storage',
         '/opt/nifi-registry/nifi-registry-current/flow_storage', '20Gi',
         'standard'),
    ]
    reg_volume_mounts = [
        V1VolumeMount(name=path[0], mount_path=path[1])
        for path in reg_volume_paths
    ]
    ensure_statefulset_with_containers(
        api_apps_v1=api_apps_v1,
        name='registry',
        namespace='default',
        replicas=1,
        containers=[
            V1Container(name='registry',
                        image='apache/nifi-registry:latest',
                        env=[
                            V1EnvVar(name='NIFI_REGISTRY_WEB_HTTP_PORT',
                                     value='19090'),
                        ],
                        ports=[V1ContainerPort(container_port=19090)],
                        volume_mounts=reg_volume_mounts),
        ],
        init_containers=[
            V1Container(
                name='init-permissions',
                image='busybox',
                command=[
                    'sh', '-c',
                    'chown -R 1000:1000 /opt/nifi-registry/nifi-registry-current'
                ],
                volume_mounts=[
                    V1VolumeMount(name=path[0], mount_path=path[1])
                    for path in reg_volume_paths
                ])
        ],
        volumes=[],
        volume_paths=reg_volume_paths)
    ensure_ingress_routed_svc(api_core_v1=api_core_v1,
                              api_custom=api_custom,
                              domain=domain,
                              hostname='registry',
                              name='registry',
                              target_name='registry',
                              namespace='default',
                              port_name='web',
                              svc_port=80,
                              target_port=19090)

    perform_nifi_ops(api_apps_v1, api_core_v1, api_custom, domain)

    perform_build_ops_bg()
    perform_mirror_ops_bg()
Exemple #13
0
def create_gcloud(
    num_workers,
    release,
    kubernetes_version,
    machine_type,
    disk_size,
    num_cpus,
    num_gpus,
    gpu_type,
    zone,
    project,
    preemptible,
    custom_value,
):
    from google.cloud import container_v1
    import google.auth
    from google.auth.exceptions import DefaultCredentialsError
    from googleapiclient import discovery, http

    try:
        credentials, default_project = google.auth.default()
    except DefaultCredentialsError:
        raise click.UsageError(
            "Couldn't find gcloud credentials. Install the gcloud"
            " sdk ( https://cloud.google.com/sdk/docs/quickstart-linux ) and "
            "run 'gcloud auth application-default login' to login and create "
            "your credentials.")

    assert num_workers >= 2, "Number of workers should be at least 2"

    if not project:
        project = default_project

    # create cluster
    gclient = container_v1.ClusterManagerClient()

    name = "{}-{}".format(release, num_workers)
    name_path = "projects/{}/locations/{}/".format(project, zone)

    extraargs = {}

    if num_gpus > 0:
        extraargs["accelerators"] = [
            container_v1.types.AcceleratorConfig(accelerator_count=num_gpus,
                                                 accelerator_type=gpu_type)
        ]

    # delete existing firewall, if any
    firewalls = discovery.build("compute", "v1",
                                cache_discovery=False).firewalls()

    existing_firewalls = firewalls.list(project=project).execute()
    fw_name = "{}-firewall".format(name)

    if any(f["name"] == fw_name for f in existing_firewalls["items"]):
        response = {}
        while not hasattr(response, "status"):
            try:
                response = firewalls.delete(project=project,
                                            firewall=fw_name).execute()
            except http.HttpError as e:
                if e.resp.status == 404:
                    response = {}
                    break
                click.echo("Wait for firewall to be available for deletion")
                sleep(5)
                response = {}
        while hasattr(response, "status") and response.status < response.DONE:
            response = gclient.get_operation(None,
                                             None,
                                             None,
                                             name=response.selfLink)
            sleep(1)

    # create cluster
    cluster = container_v1.types.Cluster(
        name=name,
        initial_node_count=num_workers,
        node_config=container_v1.types.NodeConfig(
            machine_type=machine_type,
            disk_size_gb=disk_size,
            preemptible=preemptible,
            oauth_scopes=[
                "https://www.googleapis.com/auth/devstorage.full_control",
            ],
            **extraargs,
        ),
        addons_config=container_v1.types.AddonsConfig(
            http_load_balancing=container_v1.types.HttpLoadBalancing(
                disabled=True, ),
            horizontal_pod_autoscaling=container_v1.types.
            HorizontalPodAutoscaling(disabled=True, ),
            kubernetes_dashboard=container_v1.types.KubernetesDashboard(
                disabled=True, ),
            network_policy_config=container_v1.types.NetworkPolicyConfig(
                disabled=False, ),
        ),
        logging_service=None,
        monitoring_service=None,
    )
    response = gclient.create_cluster(None, None, cluster, parent=name_path)

    # wait for cluster to load
    while response.status < response.DONE:
        response = gclient.get_operation(None,
                                         None,
                                         None,
                                         name=name_path + "/" + response.name)
        sleep(1)

    if response.status != response.DONE:
        raise ValueError("Cluster creation failed!")

    cluster = gclient.get_cluster(None,
                                  None,
                                  None,
                                  name=name_path + "/" + name)

    auth_req = google.auth.transport.requests.Request()
    credentials.refresh(auth_req)
    configuration = client.Configuration()
    configuration.host = f"https://{cluster.endpoint}:443"
    configuration.verify_ssl = False
    configuration.api_key = {"authorization": "Bearer " + credentials.token}
    client.Configuration.set_default(configuration)

    if num_gpus > 0:
        with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r:
            dep = yaml.safe_load(r)
            dep["spec"]["selector"] = {
                "matchLabels": dep["spec"]["template"]["metadata"]["labels"]
            }
            dep = client.ApiClient()._ApiClient__deserialize(
                dep, "V1DaemonSet")
            k8s_client = client.AppsV1Api()
            k8s_client.create_namespaced_daemon_set("kube-system", body=dep)

    # create tiller service account
    client.CoreV1Api().create_namespaced_service_account(
        "kube-system",
        {
            "apiVersion": "v1",
            "kind": "ServiceAccount",
            "metadata": {
                "name": "tiller",
                "generateName": "tiller",
                "namespace": "kube-system",
            },
        },
    )

    client.RbacAuthorizationV1beta1Api().create_cluster_role_binding({
        "apiVersion":
        "rbac.authorization.k8s.io/v1beta1",
        "kind":
        "ClusterRoleBinding",
        "metadata": {
            "name": "tiller"
        },
        "roleRef": {
            "apiGroup": "rbac.authorization.k8s.io",
            "kind": "ClusterRole",
            "name": "cluster-admin",
        },
        "subjects": [{
            "kind": "ServiceAccount",
            "name": "tiller",
            "namespace": "kube-system"
        }],
    })

    # deploy tiller
    tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE)
    tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT)
    client.CoreV1Api().create_namespaced_service("kube-system", tiller_service)
    client.ExtensionsV1beta1Api().create_namespaced_deployment(
        "kube-system", tiller_dep)

    sleep(1)

    pods = client.CoreV1Api().list_namespaced_pod(namespace="kube-system",
                                                  label_selector="app=helm")

    tiller_pod = pods.items[0]

    while True:
        # Wait for tiller
        resp = client.CoreV1Api().read_namespaced_pod(
            namespace="kube-system", name=tiller_pod.metadata.name)
        if resp.status.phase != "Pending":
            break
        sleep(5)

    # kubernetes python doesn't currently support port forward
    # https://github.com/kubernetes-client/python/issues/166
    ports = 44134

    # resp = stream(
    #     client.CoreV1Api().connect_get_namespaced_pod_portforward,
    #     name=tiller_pod.metadata.name,
    #     namespace=tiller_pod.metadata.namespace,
    #     ports=ports
    #     )

    with subprocess.Popen([
            "kubectl",
            "port-forward",
            "--namespace={}".format(tiller_pod.metadata.namespace),
            tiller_pod.metadata.name,
            "{0}:{0}".format(ports),
            "--server={}".format(configuration.host),
            "--token={}".format(credentials.token),
            "--insecure-skip-tls-verify=true",
    ]) as portforward:

        sleep(5)
        # install chart
        tiller = Tiller("localhost")
        chart = ChartBuilder({
            "name": "mlbench-helm",
            "source": {
                "type": "git",
                "location": "https://github.com/mlbench/mlbench-helm",
            },
        })

        values = {
            "limits": {
                "workers": num_workers - 1,
                "gpu": num_gpus,
                "cpu": num_cpus
            }
        }

        if custom_value:
            # merge custom values with values
            for cv in custom_value:
                key, v = cv.split("=", 1)

                current = values
                key_path = key.split(".")

                for k in key_path[:-1]:
                    if k not in current:
                        current[k] = {}

                    current = current[k]

                current[key_path[-1]] = v

        tiller.install_release(
            chart.get_helm_chart(),
            name=name,
            wait=True,
            dry_run=False,
            namespace="default",
            values=values,
        )

        portforward.terminate()

    # open port in firewall
    mlbench_client = ApiClient(in_cluster=False, load_config=False)
    firewall_body = {
        "name": fw_name,
        "direction": "INGRESS",
        "sourceRanges": "0.0.0.0/0",
        "allowed": [{
            "IPProtocol": "tcp",
            "ports": [mlbench_client.port]
        }],
    }

    firewalls.insert(project=project, body=firewall_body).execute()

    config = get_config()

    config.set("general", "provider", "gke")

    config.set("gke", "cluster", cluster.endpoint)

    write_config(config)

    click.echo("MLBench successfully deployed")
Exemple #14
0
def get_role_client():
    config_kube()
    role_client = client.RbacAuthorizationV1beta1Api()
    return role_client
Exemple #15
0
def create_gcloud(num_workers, release, kubernetes_version, machine_type,
                  disk_size, num_cpus, num_gpus, gpu_type, zone, project,
                  preemptible, custom_value):
    from google.cloud import container_v1
    import google.auth
    from googleapiclient import discovery, http

    credentials, default_project = google.auth.default()

    if not project:
        project = default_project

    # create cluster
    gclient = container_v1.ClusterManagerClient()

    name = '{}-{}'.format(release, num_workers)
    name_path = 'projects/{}/locations/{}/'.format(project, zone)

    extraargs = {}

    if num_gpus > 0:
        extraargs['accelerators'] = [container_v1.types.AcceleratorConfig(
            accelerator_count=num_gpus, accelerator_type=gpu_type)]

    # delete existing firewall, if any
    firewalls = discovery.build(
        'compute', 'v1', cache_discovery=False).firewalls()

    existing_firewalls = firewalls.list(project=project).execute()
    fw_name = '{}-firewall'.format(name)

    if any(f['name'] == fw_name for f in existing_firewalls['items']):
        response = {}
        while not hasattr(response, 'status'):
            try:
                response = firewalls.delete(
                    project=project, firewall=fw_name).execute()
            except http.HttpError as e:
                if e.resp.status == 404:
                    response = {}
                    break
                click.echo("Wait for firewall to be available for deletion")
                sleep(5)
                response = {}
        while hasattr(response, 'status') and response.status < response.DONE:
            response = gclient.get_operation(
                None, None, None, name=response.selfLink)
            sleep(1)

    # create cluster
    cluster = container_v1.types.Cluster(
            name=name,
            initial_node_count=num_workers,
            node_config=container_v1.types.NodeConfig(
                machine_type=machine_type,
                disk_size_gb=disk_size,
                preemptible=preemptible,
                oauth_scopes=[
                    'https://www.googleapis.com/auth/devstorage.full_control',
                ],
                **extraargs
            ),
            addons_config=container_v1.types.AddonsConfig(
                http_load_balancing=container_v1.types.HttpLoadBalancing(
                    disabled=True,
                ),
                horizontal_pod_autoscaling=
                    container_v1.types.HorizontalPodAutoscaling(
                        disabled=True,
                    ),
                kubernetes_dashboard=container_v1.types.KubernetesDashboard(
                    disabled=True,
                ),
                network_policy_config=container_v1.types.NetworkPolicyConfig(
                    disabled=False,
                ),
            ),
            logging_service=None,
            monitoring_service=None
        )
    response = gclient.create_cluster(None, None, cluster, parent=name_path)

    # wait for cluster to load
    while response.status < response.DONE:
        response = gclient.get_operation(
            None, None, None, name=name_path + '/' + response.name)
        sleep(1)

    if response.status != response.DONE:
        raise ValueError('Cluster creation failed!')

    cluster = gclient.get_cluster(
        None, None, None, name=name_path + '/' + name)

    auth_req = google.auth.transport.requests.Request()
    credentials.refresh(auth_req)
    configuration = client.Configuration()
    configuration.host = f'https://{cluster.endpoint}:443'
    configuration.verify_ssl = False
    configuration.api_key = {'authorization': 'Bearer ' + credentials.token}
    client.Configuration.set_default(configuration)

    if num_gpus > 0:
        with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r:
            dep = yaml.safe_load(r)
            dep['spec']['selector'] = {
                'matchLabels': dep['spec']['template']['metadata']['labels']
                }
            dep = client.ApiClient()._ApiClient__deserialize(dep, 'V1DaemonSet')
            k8s_client = client.AppsV1Api()
            k8s_client.create_namespaced_daemon_set('kube-system', body=dep)

    # create tiller service account
    client.CoreV1Api().create_namespaced_service_account(
        'kube-system',
        {
            'apiVersion': 'v1',
            'kind': 'ServiceAccount',
            'metadata': {
                'name': 'tiller',
                'generateName': 'tiller',
                'namespace': 'kube-system',
            },
        })

    client.RbacAuthorizationV1beta1Api().create_cluster_role_binding(
        {
            'apiVersion': 'rbac.authorization.k8s.io/v1beta1',
            'kind': 'ClusterRoleBinding',
            'metadata': {
                'name': 'tiller'
            },
            'roleRef': {
                'apiGroup': 'rbac.authorization.k8s.io',
                'kind': 'ClusterRole',
                'name': 'cluster-admin'
            },
            'subjects': [
                {
                    'kind': 'ServiceAccount',
                    'name': 'tiller',
                    'namespace': 'kube-system'
                }
            ]
        })

    # deploy tiller
    tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE)
    tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT)
    client.CoreV1Api().create_namespaced_service(
        'kube-system',
        tiller_service)
    client.ExtensionsV1beta1Api().create_namespaced_deployment(
        'kube-system',
        tiller_dep)

    sleep(1)

    pods = client.CoreV1Api().list_namespaced_pod(
        namespace='kube-system',
        label_selector='app=helm'
    )

    tiller_pod = pods.items[0]

    while True:
        # Wait for tiller
        resp = client.CoreV1Api().read_namespaced_pod(
            namespace='kube-system',
            name=tiller_pod.metadata.name
        )
        if resp.status.phase != 'Pending':
            break
        sleep(5)

    # kubernetes python doesn't currently support port forward
    # https://github.com/kubernetes-client/python/issues/166
    ports = 44134

    # resp = stream(
    #     client.CoreV1Api().connect_get_namespaced_pod_portforward,
    #     name=tiller_pod.metadata.name,
    #     namespace=tiller_pod.metadata.namespace,
    #     ports=ports
    #     )

    with subprocess.Popen([
            'kubectl',
            'port-forward',
            '--namespace={}'.format(tiller_pod.metadata.namespace),
            tiller_pod.metadata.name, '{0}:{0}'.format(ports),
            '--server={}'.format(configuration.host),
            '--token={}'.format(credentials.token),
            '--insecure-skip-tls-verify=true']) as portforward:

        sleep(5)
        # install chart
        tiller = Tiller('localhost')
        chart = ChartBuilder(
            {
                'name': 'mlbench-helm',
                'source': {
                    'type': 'git',
                    'location': 'https://github.com/mlbench/mlbench-helm'
                }})

        values = {
            'limits': {
                'workers': num_workers - 1,
                'gpu': num_gpus,
                'cpu': num_cpus
            }
        }

        if custom_value:
            # merge custom values with values
            for cv in custom_value:
                key, v = cv.split("=", 1)

                current = values
                key_path = key.split(".")

                for k in key_path[:-1]:
                    if k not in current:
                        current[k] = {}

                    current = current[k]

                current[key_path[-1]] = v

        tiller.install_release(
            chart.get_helm_chart(),
            name=name,
            wait=True,
            dry_run=False,
            namespace='default',
            values=values)

        portforward.terminate()

    # open port in firewall
    mlbench_client = ApiClient(in_cluster=False, load_config=False)
    firewall_body = {
        "name": fw_name,
        "direction": "INGRESS",
        "sourceRanges": "0.0.0.0/0",
        "allowed": [
            {"IPProtocol": "tcp", "ports": [mlbench_client.port]}
        ]
    }

    firewalls.insert(project=project, body=firewall_body).execute()

    config = get_config()

    config.set('general', 'provider', 'gke')

    config.set('gke', 'cluster', cluster.endpoint)

    write_config(config)

    click.echo("MLBench successfully deployed")
Exemple #16
0
 def _get_client(self, context):
     conf = config.new_client_from_config(context=context)
     return (k8s_client.CoreV1Api(conf),
             k8s_client.RbacAuthorizationV1beta1Api(conf))
    def __init__(self,
                 cluster_name="default-cluster",
                 kubernetes_proxy_addr=None,
                 redis_ip=None,
                 redis_port=6379,
                 useInternalIP=False,
                 namespace='default',
                 service_types=None,
                 create_namespace_if_not_exists=False):
        """

        Parameters
        ----------
        cluster_name : str
            A unique name for this Clipper cluster. This can be used to run multiple Clipper
            clusters on the same Kubernetes cluster without interfering with each other.
            Kubernetes cluster name must follow Kubernetes label value naming rule, namely:
            Valid label values must be 63 characters or less and must be empty or begin and end with
            an alphanumeric character ([a-z0-9A-Z]) with dashes (-), underscores (_), dots (.),
            and alphanumerics between. See more at:
            https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
        kubernetes_proxy_addr : str, optional
            The proxy address if you are proxying connections locally using ``kubectl proxy``.
            If this argument is provided, Clipper will construct the appropriate proxy
            URLs for accessing Clipper's Kubernetes services, rather than using the API server
            addres provided in your kube config.
        redis_ip : str, optional
            The address of a running Redis cluster. If set to None, Clipper will start
            a Redis deployment for you.
        redis_port : int, optional
            The Redis port. If ``redis_ip`` is set to None, Clipper will start Redis on this port.
            If ``redis_ip`` is provided, Clipper will connect to Redis on this port.
        useInternalIP : bool, optional
            Use Internal IP of the K8S nodes . If ``useInternalIP`` is set to False, Clipper will
            throw an exception if none of the nodes have ExternalDNS.
            If ``useInternalIP`` is set to true, Clipper will use the Internal IP of the K8S node
            if no ExternalDNS exists for any of the nodes.
        namespace: str, optional
            The Kubernetes namespace to use .
            If this argument is provided, all Clipper artifacts and resources will be created in this
            k8s namespace. If not "default" namespace is used.
        service_types: dict, optional
            Specify what kind of Kubernetes service you want.
            You must use predefined 'ServiceTypes' in Kubernetes as value. See more at:
            https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types
            For example, service_types = {
                'redis': 'NodePort',
                'management': 'LoadBalancer',
                'query': 'LoadBalancer',
                'query-rpc': 'ClusterIP',
                'metric': 'LoadBalancer'
            }
        create_namespace_if_not_exists: bool, False
            Create a k8s namespace if the namespace doesnt already exist.
            If this argument is provided and the k8s namespace does not exist a new k8s namespace will
            be created.

        Note
        ----
        Clipper stores all persistent configuration state (such as registered application and model
        information) in Redis. If you want Clipper to be durable and able to recover from failures,
        we recommend configuring your own persistent and replicated Redis cluster rather than
        letting Clipper launch one for you.
        """

        self.cluster_name = cluster_name

        if kubernetes_proxy_addr is not None:
            self.kubernetes_proxy_addr = kubernetes_proxy_addr
            self.use_k8s_proxy = True
        else:
            self.use_k8s_proxy = False

        self.redis_ip = redis_ip
        self.redis_port = redis_port
        self.useInternalIP = useInternalIP
        config.load_kube_config()
        configuration.assert_hostname = False
        self._k8s_v1 = client.CoreV1Api()
        self._k8s_beta = client.ExtensionsV1beta1Api()
        self._k8s_rbac = client.RbacAuthorizationV1beta1Api()

        # Create the template engine
        # Config: Any variable missing -> Error
        self.template_engine = jinja2.Environment(
            loader=jinja2.FileSystemLoader(cur_dir, followlinks=True),
            undefined=jinja2.StrictUndefined)

        # Check if namespace exists and if create flag set ...create the namespace or throw error
        namespaces = []
        for ns in self._k8s_v1.list_namespace().items:
            namespaces.append(ns.metadata.name)

        if namespace in namespaces:
            self.k8s_namespace = namespace
        elif create_namespace_if_not_exists:
            body = client.V1Namespace()
            body.metadata = client.V1ObjectMeta(name=namespace)
            try:
                self._k8s_v1.create_namespace(body)
            except ApiException as e:
                logging.error(
                    "Exception creating Kubernetes namespace: {}".format(e))
                raise ClipperException(
                    "Could not create Kubernetes namespace. "
                    "Reason: {}".format(e.reason))
            self.k8s_namespace = namespace
        else:
            msg = "Error connecting to Kubernetes cluster. Namespace does not exist. You can pass in KubernetesContainerManager(create_namespace_if_not_exists=True) to crate this namespcae"
            logger.error(msg)
            raise ClipperException(msg)

        # Initialize logger with cluster identifier
        if self.k8s_namespace != "default":
            self.cluster_identifier = "{cluster}".format(
                cluster=self.cluster_name)
        else:
            self.cluster_identifier = "{ns}-{cluster}".format(
                ns=self.k8s_namespace, cluster=self.cluster_name)

        self.logger = ClusterAdapter(logger,
                                     {'cluster_name': self.cluster_identifier})

        self.service_types = self._determine_service_types(service_types)
Exemple #18
0
 def factory(self, kind):
     if kind == 'Service':
         api_instance = client.CoreV1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_service,
             'list_namespaced_resource': api_instance.list_namespaced_service,
             'create_namespaced_resource': api_instance.create_namespaced_service,
             'patch_namespaced_resource': api_instance.patch_namespaced_service
         })
     elif kind == 'ServiceAccount':
         api_instance = client.CoreV1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_service_account,
             'list_namespaced_resource': api_instance.list_namespaced_service_account,
             'create_namespaced_resource': api_instance.create_namespaced_service_account,
             'patch_namespaced_resource': api_instance.patch_namespaced_service_account
         })
     elif kind == 'Deployment':
         api_instance = client.AppsV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_deployment,
             'list_namespaced_resource': api_instance.list_namespaced_deployment,
             'create_namespaced_resource': api_instance.create_namespaced_deployment,
             'patch_namespaced_resource': api_instance.patch_namespaced_deployment
         })
     elif kind == 'ClusterRole':
         api_instance = client.RbacAuthorizationV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_resource': api_instance.read_cluster_role,
             'create_resource': api_instance.create_cluster_role,
             'patch_resource': api_instance.patch_cluster_role
         })
     elif kind == 'Role':
         api_instance = client.RbacAuthorizationV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_role,
             'create_namespaced_resource': api_instance.create_namespaced_role,
             'patch_namespaced_resource': api_instance.patch_namespaced_role
         })
     elif kind == 'RoleBinding':
         api_instance = client.RbacAuthorizationV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_role_binding,
             'create_namespaced_resource': api_instance.create_namespaced_role_binding,
             'patch_namespaced_resource': api_instance.patch_namespaced_role_binding
         })
     elif kind == 'ClusterRoleBinding':
         api_instance = client.RbacAuthorizationV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_resource': api_instance.read_cluster_role_binding,
             'create_resource': api_instance.create_cluster_role_binding,
             'patch_resource': api_instance.patch_cluster_role_binding
         })
     elif kind == 'StatefulSet':
         api_instance = client.AppsV1beta1Api()
         api_instance = Adapter(api_instance, **{
             'read_namespaced_resource': api_instance.read_namespaced_stateful_set,
             'list_namespaced_resource': api_instance.list_namespaced_stateful_set,
             'create_namespaced_resource': api_instance.create_namespaced_stateful_set,
             'patch_namespaced_resource': api_instance.patch_namespaced_stateful_set
         })
     else:
         # TODO: implement all resource kinds
         raise TypeError('kind {} not implemented'.format(kind))
     return api_instance
Exemple #19
0
    def get_resources(self, resource, namespace):
        names = []
        config.load_kube_config()

        v1 = client.CoreV1Api()
        v1Beta1 = client.AppsV1beta1Api()
        extensionsV1Beta1 = client.ExtensionsV1beta1Api()
        autoscalingV1Api = client.AutoscalingV1Api()
        rbacAPi = client.RbacAuthorizationV1beta1Api()
        batchV1Api = client.BatchV1Api()
        batchV2Api = client.BatchV2alpha1Api()

        if resource == "pod":
            ret = v1.list_pod_for_all_namespaces(watch=False)
        elif resource == "service":
            ret = v1.list_service_for_all_namespaces(watch=False)
        elif resource == "deployment":
            ret = v1Beta1.list_deployment_for_all_namespaces(watch=False)
        elif resource == "statefulset":
            ret = v1Beta1.list_stateful_set_for_all_namespaces(watch=False)
        elif resource == "node":
            ret = v1.list_node(watch=False)
        elif resource == "namespace":
            ret = v1.list_namespace(watch=False)
        elif resource == "daemonset":
            ret = extensionsV1Beta1.list_daemon_set_for_all_namespaces(watch=False)
        elif resource == "networkpolicy":
            ret = extensionsV1Beta1.list_network_policy_for_all_namespaces(watch=False)
        elif resource == "thirdpartyresource":
            ret = extensionsV1Beta1.list_third_party_resource(watch=False)
        elif resource == "replicationcontroller":
            ret = v1.list_replication_controller_for_all_namespaces(watch=False)
        elif resource == "replicaset":
            ret = extensionsV1Beta1.list_replica_set_for_all_namespaces(watch=False)
        elif resource == "ingress":
            ret = extensionsV1Beta1.list_ingress_for_all_namespaces(watch=False)
        elif resource == "endpoints":
            ret = v1.list_endpoints_for_all_namespaces(watch=False)
        elif resource == "configmap":
            ret = v1.list_config_map_for_all_namespaces(watch=False)
        elif resource == "event":
            ret = v1.list_event_for_all_namespaces(watch=False)
        elif resource == "limitrange":
            ret = v1.list_limit_range_for_all_namespaces(watch=False)
        elif resource == "configmap":
            ret = v1.list_config_map_for_all_namespaces(watch=False)
        elif resource == "persistentvolume":
            ret = v1.list_persistent_volume(watch=False)
        elif resource == "secret":
            ret = v1.list_secret_for_all_namespaces(watch=False)
        elif resource == "resourcequota":
            ret = v1.list_resource_quota_for_all_namespaces(watch=False)
        elif resource == "componentstatus":
            ret = v1.list_component_status(watch=False)
        elif resource == "podtemplate":
            ret = v1.list_pod_template_for_all_namespaces(watch=False)
        elif resource == "serviceaccount":
            ret = v1.list_service_account_for_all_namespaces(watch=False)
        elif resource == "horizontalpodautoscaler":
            ret = autoscalingV1Api.list_horizontal_pod_autoscaler_for_all_namespaces(watch=False)
        elif resource == "clusterrole":
            ret = rbacAPi.list_cluster_role(watch=False)
        elif resource == "clusterrolebinding":
            ret = rbacAPi.list_cluster_role_binding(watch=False)
        elif resource == "job":
            ret = batchV1Api.list_job_for_all_namespaces(watch=False)
        elif resource == "cronjob":
            ret = batchV2Api.list_cron_job_for_all_namespaces(watch=False)
        elif resource == "scheduledjob":
            ret = batchV2Api.list_scheduled_job_for_all_namespaces(watch=False)
        for i in ret.items:
            names.append((i.metadata.name, i.metadata.namespace))
        return names
 def rbac_v1_api(self):
     return client.RbacAuthorizationV1beta1Api(self.configuration)