def create_cluster( self, cluster: Union[Dict, Cluster], project_id: Optional[str] = None, retry: Retry = DEFAULT, timeout: float = DEFAULT ) -> str: """ Creates a cluster, consisting of the specified number and type of Google Compute Engine instances. :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of the same form as the protobuf message :class:`google.cloud.container_v1.types.Cluster` :type cluster: dict or google.cloud.container_v1.types.Cluster :param project_id: Google Cloud Platform project ID :type project_id: str :param retry: A retry object (``google.api_core.retry.Retry``) used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :return: The full url to the new, or existing, cluster :raises: ParseError: On JSON parsing problems when trying to convert dict AirflowException: cluster is not dict type nor Cluster proto type """ if isinstance(cluster, dict): cluster_proto = Cluster() cluster = ParseDict(cluster, cluster_proto) elif not isinstance(cluster, Cluster): raise AirflowException( "cluster is not instance of Cluster proto or python dict") self._append_label(cluster, 'airflow-version', 'v' + version.version) self.log.info( "Creating (project_id=%s, zone=%s, cluster_name=%s)", project_id, self.location, cluster.name ) try: resource = self.get_conn().create_cluster(project_id=project_id, zone=self.location, cluster=cluster, retry=retry, timeout=timeout) resource = self.wait_for_operation(resource) return resource.target_link except AlreadyExists as error: self.log.info('Assuming Success: %s', error.message) return self.get_cluster(name=cluster.name).self_link
def test_create_cluster_proto(self, wait_mock, convert_mock, mock_project_id): mock_cluster_proto = Cluster() mock_cluster_proto.name = CLUSTER_NAME retry_mock, timeout_mock = mock.Mock(), mock.Mock() client_create = self.gke_hook._client.create_cluster = mock.Mock() self.gke_hook.create_cluster(cluster=mock_cluster_proto, project_id=TEST_GCP_PROJECT_ID, retry=retry_mock, timeout=timeout_mock) client_create.assert_called_once_with(project_id=TEST_GCP_PROJECT_ID, zone=GKE_ZONE, cluster=mock_cluster_proto, retry=retry_mock, timeout=timeout_mock) wait_mock.assert_called_once_with(client_create.return_value) convert_mock.assert_not_called()
def create_cluster( self, cluster: Union[Dict, Cluster, None], project_id: str = PROVIDE_PROJECT_ID, retry: Union[Retry, _MethodDefault] = DEFAULT, timeout: Optional[float] = None, ) -> str: """ Creates a cluster, consisting of the specified number and type of Google Compute Engine instances. :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of the same form as the protobuf message :class:`google.cloud.container_v1.types.Cluster` :param project_id: Google Cloud project ID :param retry: A retry object (``google.api_core.retry.Retry``) used to retry requests. If None is specified, requests will not be retried. :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :return: The full url to the new, or existing, cluster :raises: ParseError: On JSON parsing problems when trying to convert dict AirflowException: cluster is not dict type nor Cluster proto type """ if isinstance(cluster, dict): cluster = Cluster.from_json(json.dumps(cluster)) elif not isinstance(cluster, Cluster): raise AirflowException( "cluster is not instance of Cluster proto or python dict") self._append_label(cluster, 'airflow-version', 'v' + version.version) # type: ignore self.log.info( "Creating (project_id=%s, location=%s, cluster_name=%s)", project_id, self.location, cluster.name, # type: ignore ) try: resource = self.get_cluster_manager_client().create_cluster( parent=f'projects/{project_id}/locations/{self.location}', cluster=cluster, # type: ignore retry=retry, timeout=timeout, ) resource = self.wait_for_operation(resource) return resource.target_link except AlreadyExists as error: self.log.info('Assuming Success: %s', error.message) return self.get_cluster(name=cluster.name, project_id=project_id) # type: ignore
def persist(context: "Context", task_instance, cluster: Union[Dict, Cluster, None]): if isinstance(cluster, dict): cluster = Cluster.from_json(json.dumps(cluster)) task_instance.xcom_push( context=context, key=KubernetesEngineClusterLink.key, value={ "location": task_instance.location, "cluster_name": cluster.name, # type: ignore "project_id": task_instance.project_id, }, )
def create_cluster(self, cluster, retry=DEFAULT, timeout=DEFAULT): """ Creates a cluster, consisting of the specified number and type of Google Compute Engine instances. :param cluster: A Cluster protobuf or dict. If dict is provided, it must be of the same form as the protobuf message google.cloud.container_v1.types.Cluster :type cluster: dict or google.cloud.container_v1.types.Cluster :param retry: A retry object (google.api_core.retry.Retry) used to retry requests. If None is specified, requests will not be retried. :type retry: google.api_core.retry.Retry :param timeout: The amount of time, in seconds, to wait for the request to complete. Note that if retry is specified, the timeout applies to each individual attempt. :type timeout: float :return: The full url to the new, or existing, cluster :raises ParseError: On JSON parsing problems when trying to convert dict AirflowException: cluster is not dict type nor Cluster proto type """ if isinstance(cluster, dict): cluster_proto = Cluster() cluster = self._dict_to_proto(py_dict=cluster, proto=cluster_proto) elif not isinstance(cluster, Cluster): raise AirflowException( "cluster is not instance of Cluster proto or python dict") self._append_label(cluster, 'airflow-version', 'v' + version.version) self.log.info( "Creating (project_id={}, zone={}, cluster_name={})".format( self.project_id, self.location, cluster.name)) try: op = self.client.create_cluster(project_id=self.project_id, zone=self.location, cluster=cluster, retry=retry, timeout=timeout) op = self.wait_for_operation(op) return op.target_link except AlreadyExists as error: self.log.info('Assuming Success: ' + error.message) return self.get_cluster(name=cluster.name).self_link
def provisioning(self): logger.info("Starting provisioning Kubernetes clusters") cluster_manager_client = self._get_gke_client() project_id = self.configs['project_id'] list_zones = list() for cluster in self.configs['clusters']: list_zones.append(cluster['zone']) logger.info("Checking the Kubernetes clusters exist or not") clusters_ok, clusters_ko = self._get_existed_clusters(project_id, list_zones, cluster_manager_client) for cluster in self.configs['clusters']: key = '%s:%s' % (cluster['zone'], cluster['cluster_name']) if key in clusters_ok: logger.info('Cluster %s in zone %s already existed and is running' % (cluster['cluster_name'], cluster['zone'])) self.clusters.append(clusters_ok[key]) elif key in clusters_ko: logger.info('Cluster "%s" in zone %s already existed but not running' % (cluster['cluster_name'], cluster['zone'])) else: logger.info('Deploying K8s cluster "%s" with %s nodes in zone %s' % (cluster['cluster_name'], cluster['n_nodes'], cluster['zone'])) cluster_specs = Cluster(mapping={ 'name': cluster['cluster_name'], 'locations': [cluster['zone']], 'initial_node_count': cluster['n_nodes'], 'ip_allocation_policy': {'use_ip_aliases': True} }) cluster_manager_client.create_cluster(cluster=cluster_specs, parent='projects/%s/locations/%s' % (project_id, cluster['zone'])) sleep(40 * cluster['n_nodes']) i = 0 while i < 10: c = cluster_manager_client.get_cluster(project_id=project_id, zone=cluster['zone'], cluster_id=cluster['cluster_name']) if c.status == 2: self.clusters.append(c) break i += 1 # nodes take a while to boot up sleep(20) logger.info("Finish provisioning Kubernetes clusters on GKE\n")
def test_create_cluster_dict(self, wait_mock, convert_mock, mock_project_id): mock_cluster_dict = {'name': CLUSTER_NAME} retry_mock, timeout_mock = mock.Mock(), mock.Mock() client_create = self.gke_hook._client.create_cluster = mock.Mock() proto_mock = convert_mock.return_value = mock.Mock() self.gke_hook.create_cluster( cluster=mock_cluster_dict, project_id=TEST_GCP_PROJECT_ID, retry=retry_mock, timeout=timeout_mock ) client_create.assert_called_once_with( project_id=TEST_GCP_PROJECT_ID, zone=GKE_ZONE, cluster=proto_mock, retry=retry_mock, timeout=timeout_mock, ) wait_mock.assert_called_once_with(client_create.return_value) convert_mock.assert_called_once_with({'name': 'test-cluster'}, Cluster())
def node_pools(options: Dict[str, Any]) -> Iterator[List[str]]: credentials, project = google.auth.default() if options["project"] is None: options["project"] = project gke = ClusterManagerClient(credentials=credentials) # build node pool configurations pools = {} if options["generator"]: if options["load_balancer"] is None or options[ "server_uri"] is not None: pools["generator"] = NodePool(initial_node_count=1) pools["generator"].config.machine_type = "n1-highcpu-2" if options["load_balancer"] is None: pools["server"] = NodePool(initial_node_count=4) pools["server"].config.machine_type = "n1-highcpu-2" if options["emulator"]: pools["emulator"] = NodePool(initial_node_count=1) else: # need pubsub permissions pools["server"].config.oauth_scopes.append( "https://www.googleapis.com/auth/pubsub") # add labels for name, pool in pools.items(): pool.name = name pool.config.preemptible = options["preemptible"] pool.config.labels["name"] = name if options["location"][-2] == "-": # triple node count for single zone cluster pool.initial_node_count *= 3 # create cluster if not pools: yield [] # nothing to create else: kwargs = { "cluster": Cluster( name=options["cluster"], logging_service=None, monitoring_service=None, node_pools=list(pools.values()), ), "parent": f"projects/{options['project']}/locations/{options['location']}", } name = f"{kwargs['parent']}/clusters/{options['cluster']}" try: operation = gke.create_cluster(**kwargs) except AlreadyExists: pass else: # wait for operation to complete request = GetOperationRequest( name=operation.self_link.split("projects").pop()) while gke.get_operation( request).status <= Operation.Status.RUNNING: time.sleep(15) # set kube credentials cluster = gke.get_cluster(name=name) config = kube.Configuration() config.host = f"https://{cluster.endpoint}:443" config.verify_ssl = False config.api_key = {"authorization": f"Bearer {credentials.token}"} kube.Configuration.set_default(config) # delete cluster after test completes try: yield list(pools) finally: gke.delete_cluster(name=name)
def cluster(options: Dict[str, Any]) -> Iterator[Optional[str]]: if options["cluster"] is None: load_kube_config() if options["project"] is None: options["project"] = "test" yield None else: credentials, project = google.auth.default() if options["project"] is None: options["project"] = project gke = ClusterManagerClient(credentials=credentials) # create cluster parent = f"projects/{options['project']}/locations/{options['location']}" name = f"{parent}/clusters/{options['cluster']}" try: operation = gke.create_cluster( cluster=Cluster( name=options["cluster"], logging_service=None, monitoring_service=None, node_pools=[ NodePool( initial_node_count=1, name="test", config={ "preemptible": options["preemptible"], "machine_type": "n1-highcpu-2", }, ) ], ), parent=parent, ) except AlreadyExists: pass else: # wait for operation to complete request = GetOperationRequest( name=operation.self_link.split("projects").pop()) while gke.get_operation( request).status <= Operation.Status.RUNNING: time.sleep(15) # set kube credentials cluster = gke.get_cluster(name=name) config = kube.Configuration() config.host = f"https://{cluster.endpoint}:443" config.verify_ssl = False config.api_key = {"authorization": f"Bearer {credentials.token}"} kube.Configuration.set_default(config) # delete cluster after test completes try: yield options["cluster"] finally: try: # delete persistent volumes because gke cluster delete won't do it # https://cloud.google.com/kubernetes-engine/docs/how-to/deleting-a-cluster#overview api = kube.CoreV1Api() for pv in api.list_persistent_volume().items: try: pv.spec.persistent_volume_reclaim_policy = "Delete" api.patch_persistent_volume( name=pv.metadata.name, body=pv, ) api.delete_persistent_volume( name=pv.metadata.name, grace_period_seconds=0, propagation_policy="Foreground", ) except ApiException: print_exc() # wait for pv deletes to complete for _ in range(60): if not api.list_persistent_volume().items: break time.sleep(1) else: print("FAILED TO CLEANUP PERSISTENT VOLUMES") finally: gke.delete_cluster(name=name)