def take_action(self, parsed_args): dataset_mapping = parsed_args.dataset_mapping if not dataset_mapping: raise RuntimeError('dataset mapping file is required') client = container_v1.ClusterManagerClient() clusters = client.list_clusters(parsed_args.gcs_project_id, '-', parent=parsed_args.gcs_region).clusters cluster_names = [cluster.name for cluster in clusters] print('Existing clusters: {0}'.format(cluster_names)) with open(dataset_mapping, "r") as f: datasets = json.load(f) dataset_clusters = [ '{0}{1}'.format(parsed_args.prefix, i) for i, d in enumerate(datasets) ] cluster_names = ' '.join(dataset_clusters) print('this will try to delete clusters {0}. confirm (y/n)?'.format( cluster_names)) choice = input().lower() if choice != 'y': raise RuntimeError('canceling deletion') client = container_v1.ClusterManagerClient() r = subprocess.run(( 'gcloud container clusters --quiet delete --region {1} {0}'.format( cluster_names, parsed_args.gcs_region).split(' ')))
def db(): """ Provides a real cloud database, with all test users removed afterwards. """ database = firestore.Client() yield database users = database.collection('users') batch = database.batch() for user in (users.where('username', '>=', '__').where('username', '<', '_`')).stream(): batch.delete(users.document(user.id)) batch.commit() gke_client = container_v1.ClusterManagerClient() cluster = gke_client.get_cluster('st-dev-252104', 'asia-northeast1-b', 'st-dev') k8s = kubernetes_api(cluster) gameservers = k8s.get_namespaced_custom_object('agones.dev', 'v1', 'default', 'gameservers', '')['items'] for gameserver in gameservers: if gameserver['status']['state'] == 'Ready': continue k8s.delete_namespaced_custom_object('agones.dev', 'v1', 'default', 'gameservers', gameserver['metadata']['name'], V1DeleteOptions())
def __init__(self, credentials=None, throttle_secs=1.0): self.credentials = credentials self._client = container.ClusterManagerClient( credentials=self.credentials ) self.throttle_secs = throttle_secs self._last_request_time = time.time()
def take_action(self, parsed_args): dataset_mapping = parsed_args.dataset_mapping if not dataset_mapping: raise RuntimeError('dataset mapping file is required') with open(dataset_mapping, "r") as f: datasets = json.load(f) client = container_v1.ClusterManagerClient() clusters = client.list_clusters(parsed_args.gcs_project_id, '-', parent=parsed_args.gcs_region).clusters cluster_names = [cluster.name for cluster in clusters] for i, ds in enumerate(datasets): cname = '{0}{1}'.format(parsed_args.prefix, i) #if cname in cluster_names: # raise RuntimeError('cluster {0} already exists, check cluster list'.format(cname)) for i, ds in enumerate(datasets): cname = '{0}{1}'.format(parsed_args.prefix, i) local_ssd_count = 1 if 'dpath' in ds: local_ssd_count = 0 r = subprocess.run(( 'gcloud container clusters create --quiet --async --no-enable-basic-auth ' '--no-issue-client-certificate --disk-size 90 ' '--disk-type pd-ssd --image-type cos --machine-type {0} ' '--num-nodes {1} --region {2} --cluster-version 1.12.8-gke.10 ' '--metadata disable-legacy-endpoints=true --no-enable-cloud-logging ' '--no-enable-cloud-monitoring --no-enable-autorepair --enable-ip-alias ' '--create-subnetwork name={3},range=10.{4}.0.0/21 --local-ssd-count {5} {3}' .format(ds['flavor'], ds['nodes'], parsed_args.gcs_region, cname, (110 + i), local_ssd_count)).split(' '))
def configure_for_gke(zone, name): ''' Configures the Kubernetes client API to communicate with a cluster running on Google Kubernetes Engine (GKE). - `zone` specifies the GCP zone of the Kubernetes cluster - `name` specifies the name of the Kubernetes cluster ''' # Retrieve our credentials from the environment credentials, project = google.auth.default() # Verify that we were able to automatically detect the project ID if project is None: raise RuntimeError('could not auto-detect GCP project ID') # Create a GKE cluster management client using our credentials gkeClient = gke.ClusterManagerClient(credentials=credentials) # Retrieve the details for our GKE cluster so we can extract the Kubernetes master endpoint # (Note that this call will also populate our credentials with the bearer token needed to authenticate with the master) cluster = gkeClient.get_cluster(project, zone, name) # Populate our Kubernetes API client configuration # (Based on the example from here: <https://stackoverflow.com/a/52082629>) config = k8s.Configuration() # TODO: if credentials.token is None: get K8S_SERVICE_ACCT_TOKEN from Firestore with botleague_helpers simple_key_value_store package config.api_key = {'authorization': 'Bearer ' + credentials.token} config.host = 'https://{}:443'.format(cluster.endpoint) config.verify_ssl = False k8s.Configuration.set_default(config)
def test_list_clusters(self): project_id = os.environ['PROJECT_ID'] client = container_v1.ClusterManagerClient() project_id_2 = project_id zone = 'us-central1-a' response = client.list_clusters(project_id_2, zone)
def delete_gcloud(name, zone, project): from google.cloud import container_v1 import google.auth credentials, default_project = google.auth.default() if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name_path = 'projects/{}/locations/{}/'.format( project, zone) cluster_path = '{}clusters/{}'.format(name_path, name) response = gclient.delete_cluster(None, None, None, name=cluster_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation( None, None, None, name=name_path + '/' + response.name) sleep(1) if response.status != response.DONE: raise ValueError('Cluster deletion failed!') click.echo("Cluster deleted.")
def get_client(self): if self._client is None: credentials = self._get_credentials() # Add client library info for better error tracking client_info = ClientInfo(client_library_version='airflow_v' + version.version) self._client = container_v1.ClusterManagerClient(credentials=credentials, client_info=client_info) return self._client
def createNodePool(): project_id = os.environ['GCP_PROJECT_ID'] zone = os.environ['GCP_ZONE'] cluster_id = os.environ['GCP_CLUSTER_ID'] region = os.environ['GCP_REGION'] client = container_v1.ClusterManagerClient() client.create_node_pool( project_id, zone, cluster_id, { 'name': 'default-pool', 'config': { 'machine_type': 'n1-standard-4', 'disk_size_gb': 30, 'preemptible': True, 'oauth_scopes': ('https://www.googleapis.com/auth/cloud-platform', ) }, 'initial_node_count': 1, 'autoscaling': { 'enabled': True, 'min_node_count': 1, 'max_node_count': 3, }, 'management': { 'auto_repair': True, }, })
def test_list_clusters(self): project_id = os.environ["PROJECT_ID"] client = container_v1.ClusterManagerClient() response = client.list_clusters(request={ "project_id": os.environ["PROJECT_ID"], "zone": "us-central1-a" })
def get_gke_client(credentials=None): """Creates and returns a gke client. Credentials only needed if the Auth environment variable is not set. """ client = container_v1.ClusterManagerClient(credentials=credentials) return client
def _init_k8s_client(self): # Attempt to initialize a Kubernetes client to retrieve Job statuses. # Different methods are used depending on where this code runs. try: # This method is used when there is no local kubeconfig file, e.g. # running this code within a Cloud Function. For local runs, you can # use this path by running `gcloud auth application-default login`. self.logger.info('Attempting to init k8s client from cluster response.') container_client = container_v1.ClusterManagerClient() # Try zonal cluster first, then try regional. try: cluster_path = "projects/{}/locations/{}/clusters/{}".format( self.project_id, self.zone, self.cluster_name) response = container_client.get_cluster( None, None, None, name=cluster_path) except google.api_core.exceptions.NotFound: self.logger.warning('No zonal cluster found for {}. Trying regional.'.format(cluster_path)) # TODO: include this in message instead region = self.zone[:-2] cluster_path = "projects/{}/locations/{}/clusters/{}".format( self.project_id, region, self.cluster_name) response = container_client.get_cluster( None, None, None, name=cluster_path) credentials, project = google.auth.default( scopes=['https://www.googleapis.com/auth/cloud-platform']) creds, projects = google.auth.default() auth_req = google.auth.transport.requests.Request() creds.refresh(auth_req) configuration = kubernetes.client.Configuration() configuration.host = f'https://{response.endpoint}' with NamedTemporaryFile(delete=False) as ca_cert: ca_cert.write( base64.b64decode(response.master_auth.cluster_ca_certificate)) configuration.ssl_ca_cert = ca_cert.name configuration.api_key_prefix['authorization'] = 'Bearer' configuration.api_key['authorization'] = creds.token self.k8s_client = kubernetes.client.BatchV1Api( kubernetes.client.ApiClient(configuration)) self.logger.info('Successful init of k8s client from cluster response.') except Exception as e1: # This method is generally used for local runs where the user has already # ran `gcloud container clusters get-credentials` to get a kubeconfig. self.logger.warning( 'Failed to load k8s client from cluster response: {}. ' 'Falling back to local kubeconfig file.'.format(e1)) try: kubernetes.config.load_kube_config() self.k8s_client = kubernetes.client.BatchV1Api() self.logger.info( 'Successful init of k8s client from local kubeconfig file.') except Exception as e2: self.logger.fatal( 'Failed both methods of loading k8s client. Error for ' 'cluster response method: {}. Error for local ' 'kubeconfig file: {}. No job status will be ' 'collected.'.format(e1, e2)) raise
def gcp_connect_service(service, credentials=None, region_name=None): logging.getLogger('googleapiclient.discovery_cache').setLevel( logging.ERROR) # Set logging level to error for GCP services as otherwise generates a lot of warnings logging.getLogger().setLevel(logging.ERROR) try: if service == 'cloudresourcemanager': return discovery.build('cloudresourcemanager', 'v1', cache_discovery=False, cache=MemoryCache()) elif service == 'cloudresourcemanager-v2': return discovery.build('cloudresourcemanager', 'v2', cache_discovery=False, cache=MemoryCache()) elif service == 'cloudstorage': return storage.Client() elif service == 'cloudsql': return discovery.build('sqladmin', 'v1beta4', cache_discovery=False, cache=MemoryCache()) elif service == 'iam': return discovery.build('iam', 'v1', cache_discovery=False, cache=MemoryCache()) if service == 'stackdriverlogging': return stackdriver_logging.Client() if service == 'stackdrivermonitoring': return monitoring_v3.MetricServiceClient() elif service == 'computeengine': return discovery.build('compute', 'v1', cache_discovery=False, cache=MemoryCache()) elif service == 'kubernetesengine': return container_v1.ClusterManagerClient() else: printException('Service %s not supported' % service) return None except Exception as e: printException(e) return None
def __init__(self, project_id, location): self.project_id = project_id self.location = location # Add client library info for better error tracking client_info = ClientInfo(client_library_version='airflow_v' + version.version) self.client = container_v1.ClusterManagerClient( client_info=client_info)
def get_conn(self) -> container_v1.ClusterManagerClient: """ Returns ClusterManagerCLinet object. :rtype: google.cloud.container_v1.ClusterManagerClient """ if self._client is None: credentials = self._get_credentials() self._client = container_v1.ClusterManagerClient(credentials=credentials, client_info=CLIENT_INFO) return self._client
def __init__(self, credentials: _credentials_type = None, throttle_secs: float = 1.0): if isinstance(credentials, str): credentials = make_credentials(credentials) self.credentials = credentials self._client = container.ClusterManagerClient( credentials=self.credentials) self.throttle_secs = throttle_secs self._last_request_time = time.time()
def check_cluster_label(self, label): """Checks a specifed resourceLabel for a GKE cluster""" model = self.model client = container_v1.ClusterManagerClient() cluster_name = "projects/{0}/locations/{1}/clusters/{2}".format( model.project_id, model.cluster_zone, model.cluster_name ) response = client.get_cluster(name=cluster_name) return response.resource_labels[label] == "true"
def get_client(self): """ Returns ClusterManagerCLinet object. :rtype: google.cloud.container_v1.ClusterManagerCLinet """ if self._client is None: credentials = self._get_credentials() # Add client library info for better error tracking client_info = ClientInfo(client_library_version='airflow_v' + version.version) self._client = container_v1.ClusterManagerClient( credentials=credentials, client_info=client_info) return self._client
def gcloud_delete_cluster(name, zone, project): """Deletes the cluster Args: name (str): Cluster name zone (str): Cluster zone project (str): Cluster project """ try: credentials, default_project = google.auth.default() except DefaultCredentialsError: raise click.UsageError( "Couldn't find gcloud credentials. Install the gcloud" " sdk ( https://cloud.google.com/sdk/docs/quickstart-linux ) and " "run 'gcloud auth application-default login' to login and create " "your credentials.") if not project: project = default_project # delete cluster gclient = container_v1.ClusterManagerClient() name_path = "projects/{}/locations/{}/clusters/".format(project, zone) cluster_path = os.path.join(name_path, name) try: response = gclient.delete_cluster(None, None, None, name=cluster_path) except NotFound as e: click.echo("Exception from Google: " + str(e)) click.echo("Double-check your project, zone and cluster name") click.echo( "Try running 'gcloud container clusters list' to list all active clusters" ) sys.exit(1) # wait for operation to complete while response.status < response.DONE: response = gclient.get_operation(None, None, None, name=os.path.join( name_path, response.name)) sleep(1) if response.status != response.DONE: raise ValueError("Cluster deletion failed!")
def reset_demo(event, context): # Authenticate to Google using Application Default credentials credentials, project = google.auth.default() # Fetch all clusters in the project cluster_client = container_v1.ClusterManagerClient(credentials=credentials) response = cluster_client.list_clusters(GCP_PROJECT, "-") # Get the cluster with the requested name from the list of clusters cluster = next( cluster for cluster in response.clusters if cluster.name == CLUSTER_NAME ) cluster_ip = cluster.endpoint cluster_cert = cluster.master_auth.cluster_ca_certificate # Configure authentication for Kubernetes API connection k8s_config = kubernetes.client.Configuration() k8s_config.api_key["authorization"] = credentials.token # GCP Auth Token k8s_config.api_key_prefix["authorization"] = "Bearer" k8s_config.host = f"https://{cluster_ip}" # Create a temporary file containing the CA cert to validate Kubernetes # cluster TLS certificate with NamedTemporaryFile(delete=False) as cert: cert.write(base64.decodebytes(cluster_cert.encode())) k8s_config.ssl_ca_cert = cert.name # Create the Kubernetes API client k8s_client = kubernetes.client.AppsV1Api(kubernetes.client.ApiClient(k8s_config)) # Payload to reset a pod (see https://stackoverflow.com/a/59051313) reset_request = { "spec": { "template": { "metadata": { "annotations": { "kubectl.kubernetes.io/restartedAt": datetime.datetime.now().isoformat() } } } } } # Perform a `kubectl rollout restart` to restart the pod response = k8s_client.patch_namespaced_deployment( DEPLOYMENT_NAME, "default", reset_request )
def removeNodePool(): project_id = os.environ['GCP_PROJECT_ID'] zone = os.environ['GCP_ZONE'] cluster_id = os.environ['GCP_CLUSTER_ID'] region = os.environ['GCP_REGION'] # get loadbalancer info compute = discovery.build('compute', 'v1') fr_ctrl = compute.forwardingRules() tp_ctrl = compute.targetPools() forword_rules = fr_ctrl.list(project=project_id, region=region).execute()['items'] target_pools = tp_ctrl.list(project=project_id, region=region).execute()['items'] # get gke cluster info client = container_v1.ClusterManagerClient() cluster = client.get_cluster(project_id, zone, cluster_id) instance_group_id = cluster.node_pools[0].instance_group_urls[0].split( '/')[-1].rstrip('-grp') # delete node_pool for node_pool in cluster.node_pools: client.delete_node_pool(project_id, zone, cluster_id, node_pool.name) # remove loadbalancer target_pool = pydash.find( target_pools, lambda tp: pydash.find( tp['instances'], lambda instance_url: instance_group_id in instance_url)) forword_rule = pydash.find( forword_rules, lambda fr: fr['target'] == target_pool['selfLink']) fr_ctrl.delete(project=project_id, region=region, forwardingRule=forword_rule['name']).execute() while True: forword_rule = pydash.find( fr_ctrl.list(project=project_id, region=region).execute().get('items'), {'name': forword_rule['name']}) if not forword_rule: break time.sleep(3) tp_ctrl.delete(project=project_id, region=region, targetPool=target_pool['name']).execute()
def __init__(self, project_id, zone, cluster_id, service_account_file): credentials = service_account.Credentials.from_service_account_file(service_account_file) cluster_manager_client = container_v1.ClusterManagerClient(credentials=credentials) cluster = cluster_manager_client.get_cluster( name=f'projects/{project_id}/locations/{zone}/clusters/{cluster_id}' ) kubeconfig_creds = credentials.with_scopes([ 'https://www.googleapis.com/auth/cloud-platform' ]) kubeconfig_creds.refresh(Request()) configuration = kubernetes_client.Configuration() configuration.host = f"https://{cluster.endpoint}:443" configuration.verify_ssl = False configuration.api_key = {"authorization": "Bearer " + kubeconfig_creds.token} kubernetes_client.Configuration.set_default(configuration) self.kube_api = kubernetes_client.CoreV1Api()
def __init__(self): creds, project = google.auth.default() creds.refresh(google.auth.transport.requests.Request()) self.project = project self.client = container_v1.ClusterManagerClient(credentials=creds) self.users = [{ 'name': getpass.getuser(), 'user': { 'auth-provider': { 'config': { 'access-token': creds.token, 'cmd-args': "config config-helper --format=json", 'cmd-path': '/usr/lib/google-cloud-sdk/bin/gcloud', 'expiry': creds.expiry, 'expiry-key': '{.credential.token_expiry}', 'token-key': '{.credential.access_token}' }, 'name': 'gcp' } } }]
def delete_gcloud(name, zone, project): from google.cloud import container_v1 import google.auth from google.auth.exceptions import DefaultCredentialsError try: credentials, default_project = google.auth.default() except DefaultCredentialsError: raise click.UsageError( "Couldn't find gcloud credentials. Install the gcloud" " sdk ( https://cloud.google.com/sdk/docs/quickstart-linux ) and " "run 'gcloud auth application-default login' to login and create " "your credentials.") if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name_path = "projects/{}/locations/{}/".format(project, zone) cluster_path = "{}clusters/{}".format(name_path, name) response = gclient.delete_cluster(None, None, None, name=cluster_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation(None, None, None, name=name_path + "/" + response.name) sleep(1) if response.status != response.DONE: raise ValueError("Cluster deletion failed!") click.echo("Cluster deleted.")
def perform_cloud_ops(): # set GOOGLE_APPLICATION_CREDENTIALS env to credentials file # set GOOGLE_CLOUD_PROJECT env to project id domain = os.getenv('DOMAIN') assert domain logger.info(f'using domain: {domain}') static_ip = os.getenv('STATIC_IP') assert static_ip logger.info(f'using static IP: {static_ip}') admin_email = os.getenv('ADMIN_EMAIL') assert admin_email logger.info(f'using ACME admin email: {admin_email}') oauth_client_id = os.getenv('OAUTH_CLIENT_ID') assert oauth_client_id logger.info(f'using oauth client id: {oauth_client_id}') oauth_client_secret = os.getenv('OAUTH_CLIENT_SECRET') assert oauth_client_secret logger.info(f'using oauth client secret: {oauth_client_secret}') oauth_secret = os.getenv('OAUTH_SECRET') assert oauth_secret logger.info(f'using oauth secret: {oauth_secret}') oauth_domain = os.getenv('OAUTH_DOMAIN') assert oauth_domain logger.info(f'using domain: {oauth_domain}') django_secret_key = os.getenv('DJANGO_SECRET_KEY') assert django_secret_key logger.info(f'using DJANGO_SECRET_KEY: {django_secret_key}') credentials, project = google.auth.default() gcloud_client = container_v1.ClusterManagerClient(credentials=credentials) scan_clusters(gcloud_client, project) # FIXME add the k8s cert to a trust store urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) auth_gcloud_k8s(credentials) api_core_v1 = client.CoreV1Api() api_apps_v1 = client.AppsV1Api() api_storage_v1 = client.StorageV1Api() api_custom = client.CustomObjectsApi() api_extensions_v1_beta1 = client.ExtensionsV1beta1Api() api_ext_v1_beta1 = client.ApiextensionsV1beta1Api() api_rbac_auth_v1_b1 = client.RbacAuthorizationV1beta1Api() ensure_traefik(api_core_v1, api_ext_v1_beta1, api_apps_v1, api_custom, api_rbac_auth_v1_b1, admin_email, domain, static_ip, oauth_client_id, oauth_client_secret, oauth_domain, oauth_secret) with open(os.getenv('GOOGLE_APPLICATION_CREDENTIALS'), 'rb') as f: gcloud_credentials_b64 = b64encode(f.read()).decode('UTF-8') ensure_secret(api=api_core_v1, name='webui-credentials', namespace='default', secret=V1Secret( metadata=client.V1ObjectMeta(name='webui-credentials'), data={'gcloud-credentials': gcloud_credentials_b64})) webui_volume_paths = [ ('data', '/opt/nipyapi/data', '20Gi', 'standard'), ] webui_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in webui_volume_paths ] webui_volume_mounts.append( V1VolumeMount(name='webui-credentials', mount_path='/root/webui', read_only=True)) dind_volume_paths = [ ('docker', '/var/lib/docker', '200Gi', 'standard'), ] dind_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in dind_volume_paths ] shared_volume_mounts = [ V1VolumeMount(name='dind-socket', mount_path='/var/run-shared') ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='admin', namespace='default', replicas=1, containers=[ V1Container( name='webui', image='aichrist/nipyapi-ds:latest', env=[ # FIXME use k8s secrets for these values V1EnvVar(name='DOMAIN', value=domain), V1EnvVar(name='STATIC_IP', value=static_ip), V1EnvVar(name='ADMIN_EMAIL', value=admin_email), V1EnvVar(name='OAUTH_CLIENT_ID', value=oauth_client_id), V1EnvVar(name='OAUTH_CLIENT_SECRET', value=oauth_client_secret), V1EnvVar(name='OAUTH_SECRET', value=oauth_secret), V1EnvVar(name='OAUTH_DOMAIN', value=oauth_domain), V1EnvVar(name='DJANGO_SECRET_KEY', value=django_secret_key), V1EnvVar(name='GOOGLE_APPLICATION_CREDENTIALS', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='CLOUDSDK_AUTH_CREDENTIAL_FILE_OVERRIDE', value='/root/webui/gcloud_credentials.json'), V1EnvVar(name='GOOGLE_CLOUD_PROJECT', value=os.getenv('GOOGLE_CLOUD_PROJECT')), V1EnvVar(name='DOCKER_HOST', value='unix:///var/run-shared/docker.sock'), ], ports=[V1ContainerPort(container_port=8000)], volume_mounts=webui_volume_mounts + shared_volume_mounts), V1Container( name='dind', image='docker:19-dind', security_context=V1SecurityContext(privileged=True), command=[ 'dockerd', '-H', 'unix:///var/run-shared/docker.sock' ], volume_mounts=dind_volume_mounts + shared_volume_mounts) ], volumes=[ V1Volume(name='dind-socket', empty_dir={}), V1Volume(name='webui-credentials', projected=V1ProjectedVolumeSource(sources=[ V1VolumeProjection(secret=V1SecretProjection( name='webui-credentials', items=[ V1KeyToPath(key='gcloud-credentials', path='gcloud_credentials.json') ])) ])) ], volume_paths=webui_volume_paths + dind_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='admin', name='admin', target_name='admin', namespace='default', port_name='web', svc_port=80, target_port=8000) reg_volume_paths = [ ('database', '/opt/nifi-registry/nifi-registry-current/database', '10Gi', 'standard'), ('flow-storage', '/opt/nifi-registry/nifi-registry-current/flow_storage', '20Gi', 'standard'), ] reg_volume_mounts = [ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ] ensure_statefulset_with_containers( api_apps_v1=api_apps_v1, name='registry', namespace='default', replicas=1, containers=[ V1Container(name='registry', image='apache/nifi-registry:latest', env=[ V1EnvVar(name='NIFI_REGISTRY_WEB_HTTP_PORT', value='19090'), ], ports=[V1ContainerPort(container_port=19090)], volume_mounts=reg_volume_mounts), ], init_containers=[ V1Container( name='init-permissions', image='busybox', command=[ 'sh', '-c', 'chown -R 1000:1000 /opt/nifi-registry/nifi-registry-current' ], volume_mounts=[ V1VolumeMount(name=path[0], mount_path=path[1]) for path in reg_volume_paths ]) ], volumes=[], volume_paths=reg_volume_paths) ensure_ingress_routed_svc(api_core_v1=api_core_v1, api_custom=api_custom, domain=domain, hostname='registry', name='registry', target_name='registry', namespace='default', port_name='web', svc_port=80, target_port=19090) perform_nifi_ops(api_apps_v1, api_core_v1, api_custom, domain) perform_build_ops_bg() perform_mirror_ops_bg()
#pip install google-cloud-container google-api-python-client from google.cloud import container_v1 from googleapiclient import discovery client = container_v1.ClusterManagerClient() service = discovery.build('compute', 'v1') project_id = '' ip = [] #response = client.get_cluster(project_id,zone,cluster_name) response = client.get_cluster( name='projects/<project_name>/locations/<zone>/clusters/<cluster_name>') instance_group_urls = (list(response.instance_group_urls)) for instance_group in instance_group_urls: data = instance_group.split('zones')[1].split('/') compute_zone = data[1] instance_group_manager = data[3] request = service.instanceGroups().listInstances( project=project_id, zone=compute_zone, instanceGroup=instance_group_manager) response = request.execute() for instance_data in response['items']: instance_name = instance_data['instance'].split('/')[-1] instance_ip = service.instances().get( project=project_id, zone=compute_zone, instance=instance_name ).execute()['networkInterfaces'][0]['accessConfigs'][0]['natIP'] ip.append(instance_ip) print(ip)
def create_gcloud( num_workers, release, kubernetes_version, machine_type, disk_size, num_cpus, num_gpus, gpu_type, zone, project, preemptible, custom_value, ): from google.cloud import container_v1 import google.auth from google.auth.exceptions import DefaultCredentialsError from googleapiclient import discovery, http try: credentials, default_project = google.auth.default() except DefaultCredentialsError: raise click.UsageError( "Couldn't find gcloud credentials. Install the gcloud" " sdk ( https://cloud.google.com/sdk/docs/quickstart-linux ) and " "run 'gcloud auth application-default login' to login and create " "your credentials.") assert num_workers >= 2, "Number of workers should be at least 2" if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name = "{}-{}".format(release, num_workers) name_path = "projects/{}/locations/{}/".format(project, zone) extraargs = {} if num_gpus > 0: extraargs["accelerators"] = [ container_v1.types.AcceleratorConfig(accelerator_count=num_gpus, accelerator_type=gpu_type) ] # delete existing firewall, if any firewalls = discovery.build("compute", "v1", cache_discovery=False).firewalls() existing_firewalls = firewalls.list(project=project).execute() fw_name = "{}-firewall".format(name) if any(f["name"] == fw_name for f in existing_firewalls["items"]): response = {} while not hasattr(response, "status"): try: response = firewalls.delete(project=project, firewall=fw_name).execute() except http.HttpError as e: if e.resp.status == 404: response = {} break click.echo("Wait for firewall to be available for deletion") sleep(5) response = {} while hasattr(response, "status") and response.status < response.DONE: response = gclient.get_operation(None, None, None, name=response.selfLink) sleep(1) # create cluster cluster = container_v1.types.Cluster( name=name, initial_node_count=num_workers, node_config=container_v1.types.NodeConfig( machine_type=machine_type, disk_size_gb=disk_size, preemptible=preemptible, oauth_scopes=[ "https://www.googleapis.com/auth/devstorage.full_control", ], **extraargs, ), addons_config=container_v1.types.AddonsConfig( http_load_balancing=container_v1.types.HttpLoadBalancing( disabled=True, ), horizontal_pod_autoscaling=container_v1.types. HorizontalPodAutoscaling(disabled=True, ), kubernetes_dashboard=container_v1.types.KubernetesDashboard( disabled=True, ), network_policy_config=container_v1.types.NetworkPolicyConfig( disabled=False, ), ), logging_service=None, monitoring_service=None, ) response = gclient.create_cluster(None, None, cluster, parent=name_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation(None, None, None, name=name_path + "/" + response.name) sleep(1) if response.status != response.DONE: raise ValueError("Cluster creation failed!") cluster = gclient.get_cluster(None, None, None, name=name_path + "/" + name) auth_req = google.auth.transport.requests.Request() credentials.refresh(auth_req) configuration = client.Configuration() configuration.host = f"https://{cluster.endpoint}:443" configuration.verify_ssl = False configuration.api_key = {"authorization": "Bearer " + credentials.token} client.Configuration.set_default(configuration) if num_gpus > 0: with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r: dep = yaml.safe_load(r) dep["spec"]["selector"] = { "matchLabels": dep["spec"]["template"]["metadata"]["labels"] } dep = client.ApiClient()._ApiClient__deserialize( dep, "V1DaemonSet") k8s_client = client.AppsV1Api() k8s_client.create_namespaced_daemon_set("kube-system", body=dep) # create tiller service account client.CoreV1Api().create_namespaced_service_account( "kube-system", { "apiVersion": "v1", "kind": "ServiceAccount", "metadata": { "name": "tiller", "generateName": "tiller", "namespace": "kube-system", }, }, ) client.RbacAuthorizationV1beta1Api().create_cluster_role_binding({ "apiVersion": "rbac.authorization.k8s.io/v1beta1", "kind": "ClusterRoleBinding", "metadata": { "name": "tiller" }, "roleRef": { "apiGroup": "rbac.authorization.k8s.io", "kind": "ClusterRole", "name": "cluster-admin", }, "subjects": [{ "kind": "ServiceAccount", "name": "tiller", "namespace": "kube-system" }], }) # deploy tiller tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE) tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT) client.CoreV1Api().create_namespaced_service("kube-system", tiller_service) client.ExtensionsV1beta1Api().create_namespaced_deployment( "kube-system", tiller_dep) sleep(1) pods = client.CoreV1Api().list_namespaced_pod(namespace="kube-system", label_selector="app=helm") tiller_pod = pods.items[0] while True: # Wait for tiller resp = client.CoreV1Api().read_namespaced_pod( namespace="kube-system", name=tiller_pod.metadata.name) if resp.status.phase != "Pending": break sleep(5) # kubernetes python doesn't currently support port forward # https://github.com/kubernetes-client/python/issues/166 ports = 44134 # resp = stream( # client.CoreV1Api().connect_get_namespaced_pod_portforward, # name=tiller_pod.metadata.name, # namespace=tiller_pod.metadata.namespace, # ports=ports # ) with subprocess.Popen([ "kubectl", "port-forward", "--namespace={}".format(tiller_pod.metadata.namespace), tiller_pod.metadata.name, "{0}:{0}".format(ports), "--server={}".format(configuration.host), "--token={}".format(credentials.token), "--insecure-skip-tls-verify=true", ]) as portforward: sleep(5) # install chart tiller = Tiller("localhost") chart = ChartBuilder({ "name": "mlbench-helm", "source": { "type": "git", "location": "https://github.com/mlbench/mlbench-helm", }, }) values = { "limits": { "workers": num_workers - 1, "gpu": num_gpus, "cpu": num_cpus } } if custom_value: # merge custom values with values for cv in custom_value: key, v = cv.split("=", 1) current = values key_path = key.split(".") for k in key_path[:-1]: if k not in current: current[k] = {} current = current[k] current[key_path[-1]] = v tiller.install_release( chart.get_helm_chart(), name=name, wait=True, dry_run=False, namespace="default", values=values, ) portforward.terminate() # open port in firewall mlbench_client = ApiClient(in_cluster=False, load_config=False) firewall_body = { "name": fw_name, "direction": "INGRESS", "sourceRanges": "0.0.0.0/0", "allowed": [{ "IPProtocol": "tcp", "ports": [mlbench_client.port] }], } firewalls.insert(project=project, body=firewall_body).execute() config = get_config() config.set("general", "provider", "gke") config.set("gke", "cluster", cluster.endpoint) write_config(config) click.echo("MLBench successfully deployed")
def create_gcloud(num_workers, release, kubernetes_version, machine_type, disk_size, num_cpus, num_gpus, gpu_type, zone, project, preemptible, custom_value): from google.cloud import container_v1 import google.auth from googleapiclient import discovery, http credentials, default_project = google.auth.default() if not project: project = default_project # create cluster gclient = container_v1.ClusterManagerClient() name = '{}-{}'.format(release, num_workers) name_path = 'projects/{}/locations/{}/'.format(project, zone) extraargs = {} if num_gpus > 0: extraargs['accelerators'] = [container_v1.types.AcceleratorConfig( accelerator_count=num_gpus, accelerator_type=gpu_type)] # delete existing firewall, if any firewalls = discovery.build( 'compute', 'v1', cache_discovery=False).firewalls() existing_firewalls = firewalls.list(project=project).execute() fw_name = '{}-firewall'.format(name) if any(f['name'] == fw_name for f in existing_firewalls['items']): response = {} while not hasattr(response, 'status'): try: response = firewalls.delete( project=project, firewall=fw_name).execute() except http.HttpError as e: if e.resp.status == 404: response = {} break click.echo("Wait for firewall to be available for deletion") sleep(5) response = {} while hasattr(response, 'status') and response.status < response.DONE: response = gclient.get_operation( None, None, None, name=response.selfLink) sleep(1) # create cluster cluster = container_v1.types.Cluster( name=name, initial_node_count=num_workers, node_config=container_v1.types.NodeConfig( machine_type=machine_type, disk_size_gb=disk_size, preemptible=preemptible, oauth_scopes=[ 'https://www.googleapis.com/auth/devstorage.full_control', ], **extraargs ), addons_config=container_v1.types.AddonsConfig( http_load_balancing=container_v1.types.HttpLoadBalancing( disabled=True, ), horizontal_pod_autoscaling= container_v1.types.HorizontalPodAutoscaling( disabled=True, ), kubernetes_dashboard=container_v1.types.KubernetesDashboard( disabled=True, ), network_policy_config=container_v1.types.NetworkPolicyConfig( disabled=False, ), ), logging_service=None, monitoring_service=None ) response = gclient.create_cluster(None, None, cluster, parent=name_path) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation( None, None, None, name=name_path + '/' + response.name) sleep(1) if response.status != response.DONE: raise ValueError('Cluster creation failed!') cluster = gclient.get_cluster( None, None, None, name=name_path + '/' + name) auth_req = google.auth.transport.requests.Request() credentials.refresh(auth_req) configuration = client.Configuration() configuration.host = f'https://{cluster.endpoint}:443' configuration.verify_ssl = False configuration.api_key = {'authorization': 'Bearer ' + credentials.token} client.Configuration.set_default(configuration) if num_gpus > 0: with request.urlopen(GCLOUD_NVIDIA_DAEMONSET) as r: dep = yaml.safe_load(r) dep['spec']['selector'] = { 'matchLabels': dep['spec']['template']['metadata']['labels'] } dep = client.ApiClient()._ApiClient__deserialize(dep, 'V1DaemonSet') k8s_client = client.AppsV1Api() k8s_client.create_namespaced_daemon_set('kube-system', body=dep) # create tiller service account client.CoreV1Api().create_namespaced_service_account( 'kube-system', { 'apiVersion': 'v1', 'kind': 'ServiceAccount', 'metadata': { 'name': 'tiller', 'generateName': 'tiller', 'namespace': 'kube-system', }, }) client.RbacAuthorizationV1beta1Api().create_cluster_role_binding( { 'apiVersion': 'rbac.authorization.k8s.io/v1beta1', 'kind': 'ClusterRoleBinding', 'metadata': { 'name': 'tiller' }, 'roleRef': { 'apiGroup': 'rbac.authorization.k8s.io', 'kind': 'ClusterRole', 'name': 'cluster-admin' }, 'subjects': [ { 'kind': 'ServiceAccount', 'name': 'tiller', 'namespace': 'kube-system' } ] }) # deploy tiller tiller_service = yaml.safe_load(TILLER_MANIFEST_SERVICE) tiller_dep = yaml.safe_load(TILLER_MANIFEST_DEPLOYMENT) client.CoreV1Api().create_namespaced_service( 'kube-system', tiller_service) client.ExtensionsV1beta1Api().create_namespaced_deployment( 'kube-system', tiller_dep) sleep(1) pods = client.CoreV1Api().list_namespaced_pod( namespace='kube-system', label_selector='app=helm' ) tiller_pod = pods.items[0] while True: # Wait for tiller resp = client.CoreV1Api().read_namespaced_pod( namespace='kube-system', name=tiller_pod.metadata.name ) if resp.status.phase != 'Pending': break sleep(5) # kubernetes python doesn't currently support port forward # https://github.com/kubernetes-client/python/issues/166 ports = 44134 # resp = stream( # client.CoreV1Api().connect_get_namespaced_pod_portforward, # name=tiller_pod.metadata.name, # namespace=tiller_pod.metadata.namespace, # ports=ports # ) with subprocess.Popen([ 'kubectl', 'port-forward', '--namespace={}'.format(tiller_pod.metadata.namespace), tiller_pod.metadata.name, '{0}:{0}'.format(ports), '--server={}'.format(configuration.host), '--token={}'.format(credentials.token), '--insecure-skip-tls-verify=true']) as portforward: sleep(5) # install chart tiller = Tiller('localhost') chart = ChartBuilder( { 'name': 'mlbench-helm', 'source': { 'type': 'git', 'location': 'https://github.com/mlbench/mlbench-helm' }}) values = { 'limits': { 'workers': num_workers - 1, 'gpu': num_gpus, 'cpu': num_cpus } } if custom_value: # merge custom values with values for cv in custom_value: key, v = cv.split("=", 1) current = values key_path = key.split(".") for k in key_path[:-1]: if k not in current: current[k] = {} current = current[k] current[key_path[-1]] = v tiller.install_release( chart.get_helm_chart(), name=name, wait=True, dry_run=False, namespace='default', values=values) portforward.terminate() # open port in firewall mlbench_client = ApiClient(in_cluster=False, load_config=False) firewall_body = { "name": fw_name, "direction": "INGRESS", "sourceRanges": "0.0.0.0/0", "allowed": [ {"IPProtocol": "tcp", "ports": [mlbench_client.port]} ] } firewalls.insert(project=project, body=firewall_body).execute() config = get_config() config.set('general', 'provider', 'gke') config.set('gke', 'cluster', cluster.endpoint) write_config(config) click.echo("MLBench successfully deployed")
def gcloud_create_cluster( name, name_path, num_workers, num_gpus, gpu_type, machine_type, disk_size, preemptible, kubernetes_version, project, ): """Creates a GCloud cluster based on the given arguments Args: name (str): Cluster name name_path (str): Cluster name path (` "projects/{}/locations/{}/clusters/"`) num_workers (int): Total number of nodes (including master node) num_gpus (int): Total number of gpus per node gpu_type (str): GPU type machine_type (str): Machine type disk_size (int): Disk size in GB preemptible (bool): Use preemptible machines kubernetes_version (str): Kubernetes version project (str): GCLoud project Returns: (:obj:`google.container_v1.ClusterManagementClient`, str, :obj:): The client for cluster communication, firewall name, and cluster firewalls """ assert num_workers >= 2, "Number of workers should be at least 2" # create cluster gclient = container_v1.ClusterManagerClient() extraargs = {} if num_gpus > 0: extraargs["accelerators"] = [ container_v1.types.AcceleratorConfig(accelerator_count=num_gpus, accelerator_type=gpu_type) ] # delete existing firewall, if any firewalls = discovery.build("compute", "v1", cache_discovery=False).firewalls() existing_firewalls = firewalls.list(project=project).execute() fw_name = "{}-firewall".format(name) if any(f["name"] == fw_name for f in existing_firewalls["items"]): response = {} while not hasattr(response, "status"): try: response = firewalls.delete(project=project, firewall=fw_name).execute() except http.HttpError as e: if e.resp.status == 404: response = {} break click.echo("Wait for firewall to be available for deletion") sleep(5) response = {} while hasattr(response, "status") and response.status < response.DONE: response = gclient.get_operation(None, None, None, name=response.selfLink) sleep(1) # create cluster cluster = container_v1.types.Cluster( name=name, initial_node_count=num_workers, node_config=container_v1.types.NodeConfig( machine_type=machine_type, disk_size_gb=disk_size, preemptible=preemptible, oauth_scopes=[ "https://www.googleapis.com/auth/devstorage.full_control", ], **extraargs, ), addons_config=container_v1.types.AddonsConfig( http_load_balancing=container_v1.types.HttpLoadBalancing( disabled=True, ), horizontal_pod_autoscaling=container_v1.types. HorizontalPodAutoscaling(disabled=True, ), kubernetes_dashboard=container_v1.types.KubernetesDashboard( disabled=True, ), network_policy_config=container_v1.types.NetworkPolicyConfig( disabled=False, ), ), logging_service=None, monitoring_service=None, initial_cluster_version=kubernetes_version, ) try: response = gclient.create_cluster(cluster, parent=name_path) except AlreadyExists as e: click.echo("Exception from Google: " + str(e)) click.echo( "A cluster with this name already exists in the specified project and zone" ) click.echo( "Try running 'gcloud container clusters list' to list all active clusters" ) sys.exit(1) # wait for cluster to load while response.status < response.DONE: response = gclient.get_operation(None, None, None, name=os.path.join( name_path, response.name)) sleep(1) if response.status != response.DONE: raise ValueError("Cluster creation failed!") return gclient, fw_name, firewalls