def test_kube_config_path(self, mock_kube_config_loader, mock_kube_config_merger): kubernetes_hook = KubernetesHook(conn_id='kubernetes_kube_config_path') api_conn = kubernetes_hook.get_conn() mock_kube_config_loader.assert_called_once_with("path/to/file") mock_kube_config_merger.assert_called_once() self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
def _patch_deprecated_k8s_settings(self, hook: KubernetesHook): """ Here we read config from core Airflow config [kubernetes] section. In a future release we will stop looking at this section and require users to use Airflow connections to configure KPO. When we find values there that we need to apply on the hook, we patch special hook attributes here. """ # default for enable_tcp_keepalive is True; patch if False if conf.getboolean('kubernetes', 'enable_tcp_keepalive') is False: hook._deprecated_core_disable_tcp_keepalive = True # default verify_ssl is True; patch if False. if conf.getboolean('kubernetes', 'verify_ssl') is False: hook._deprecated_core_disable_verify_ssl = True # default for in_cluster is True; patch if False and no KPO param. conf_in_cluster = conf.getboolean('kubernetes', 'in_cluster') if self.in_cluster is None and conf_in_cluster is False: hook._deprecated_core_in_cluster = conf_in_cluster # there's no default for cluster context; if we get something (and no KPO param) patch it. conf_cluster_context = conf.get('kubernetes', 'cluster_context', fallback=None) if not self.cluster_context and conf_cluster_context: hook._deprecated_core_cluster_context = conf_cluster_context # there's no default for config_file; if we get something (and no KPO param) patch it. conf_config_file = conf.get('kubernetes', 'config_file', fallback=None) if not self.config_file and conf_config_file: hook._deprecated_core_config_file = conf_config_file
def test_kube_config_connection(self, mock_kube_config_loader, mock_kube_config_merger, mock_tempfile): kubernetes_hook = KubernetesHook(conn_id='kubernetes_kube_config') api_conn = kubernetes_hook.get_conn() mock_tempfile.is_called_once() mock_kube_config_loader.assert_called_once() mock_kube_config_merger.assert_called_once() self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
def test_should_raise_exception_on_invalid_configuration(self, conn_uri): with mock.patch.dict("os.environ", AIRFLOW_CONN_KUBERNETES_DEFAULT=conn_uri ), self.assertRaisesRegex( AirflowException, "Invalid connection configuration"): kubernetes_hook = KubernetesHook() kubernetes_hook.get_conn()
def execute(self, context): self.log.info("Creating sparkApplication") hook = KubernetesHook(conn_id=self.kubernetes_conn_id) response = hook.create_custom_resource_definition( group="sparkoperator.k8s.io", version="v1beta2", plural="sparkapplications", body=self.application_file, namespace=self.namespace) return response
def execute(self, context): self.log.info("Creating sparkApplication") hook = KubernetesHook(conn_id=self.kubernetes_conn_id) response = hook.create_custom_object( group=self.api_group, version=self.api_version, plural="sparkapplications", body=self.application_file, namespace=self.namespace, ) return response
def test_default_kube_config_connection( self, mock_kube_config_loader, mock_kube_config_merger, ): kubernetes_hook = KubernetesHook( conn_id='kubernetes_default_kube_config') api_conn = kubernetes_hook.get_conn() mock_kube_config_loader.assert_called_once_with("/mock/config") mock_kube_config_merger.assert_called_once() self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
def __init__( self, *, application_name: str, attach_log: bool = False, namespace: Optional[str] = None, kubernetes_conn_id: str = "kubernetes_default", **kwargs, ) -> None: super().__init__(**kwargs) self.application_name = application_name self.attach_log = attach_log self.namespace = namespace self.kubernetes_conn_id = kubernetes_conn_id self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
def get_hook(self): hook = KubernetesHook( conn_id=self.kubernetes_conn_id, in_cluster=self.in_cluster, config_file=self.config_file, cluster_context=self.cluster_context, ) self._patch_deprecated_k8s_settings(hook) return hook
def __init__( self, *, application_name: str, attach_log: bool = False, namespace: Optional[str] = None, kubernetes_conn_id: str = "kubernetes_default", api_group: str = 'sparkoperator.k8s.io', api_version: str = 'v1beta2', **kwargs, ) -> None: super().__init__(**kwargs) self.application_name = application_name self.attach_log = attach_log self.namespace = namespace self.kubernetes_conn_id = kubernetes_conn_id self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id) self.api_group = api_group self.api_version = api_version
def poke(self, context: Dict): self.log.info("Poking: %s", self.application_name) hook = KubernetesHook(conn_id=self.kubernetes_conn_id) response = hook.get_custom_resource_definition( group="sparkoperator.k8s.io", version="v1beta2", plural="sparkapplications", name=self.application_name, namespace=self.namespace) try: application_state = response['status']['applicationState']['state'] except KeyError: return False if application_state in self.FAILURE_STATES: raise AirflowException("Spark application failed with state: %s" % application_state) elif application_state in self.SUCCESS_STATES: self.log.info("Spark application ended successfully") return True else: self.log.info("Spark application is still in state: %s", application_state) return False
def test_get_namespace(self): kubernetes_hook_with_namespace = KubernetesHook( conn_id='kubernetes_with_namespace') kubernetes_hook_without_namespace = KubernetesHook( conn_id='kubernetes_default_kube_config') assert kubernetes_hook_with_namespace.get_namespace( ) == 'mock_namespace' assert kubernetes_hook_without_namespace.get_namespace() == 'default'
def test_get_namespace(self): kubernetes_hook_with_namespace = KubernetesHook( conn_id='kubernetes_with_namespace') kubernetes_hook_without_namespace = KubernetesHook( conn_id='kubernetes_default_kube_config') self.assertEqual(kubernetes_hook_with_namespace.get_namespace(), 'mock_namespace') self.assertEqual(kubernetes_hook_without_namespace.get_namespace(), 'default')
class SparkKubernetesSensor(BaseSensorOperator): """ Checks sparkApplication object in kubernetes cluster: .. seealso:: For more detail about Spark Application Object have a look at the reference: https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.1.0-2.4.5/docs/api-docs.md#sparkapplication :param application_name: spark Application resource name :type application_name: str :param namespace: the kubernetes namespace where the sparkApplication reside in :type namespace: str :param kubernetes_conn_id: the connection to Kubernetes cluster :type kubernetes_conn_id: str :param attach_log: determines whether logs for driver pod should be appended to the sensor log :type attach_log: bool """ template_fields = ("application_name", "namespace") FAILURE_STATES = ("FAILED", "UNKNOWN") SUCCESS_STATES = ("COMPLETED", ) @apply_defaults def __init__( self, *, application_name: str, attach_log: bool = False, namespace: Optional[str] = None, kubernetes_conn_id: str = "kubernetes_default", **kwargs, ) -> None: super().__init__(**kwargs) self.application_name = application_name self.attach_log = attach_log self.namespace = namespace self.kubernetes_conn_id = kubernetes_conn_id self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id) def _log_driver(self, application_state: str, response: dict) -> None: if not self.attach_log: return status_info = response["status"] if "driverInfo" not in status_info: return driver_info = status_info["driverInfo"] if "podName" not in driver_info: return driver_pod_name = driver_info["podName"] namespace = response["metadata"]["namespace"] log_method = self.log.error if application_state in self.FAILURE_STATES else self.log.info try: log = "" for line in self.hook.get_pod_logs(driver_pod_name, namespace=namespace): log += line.decode() log_method(log) except client.rest.ApiException as e: self.log.warning( "Could not read logs for pod %s. It may have been disposed.\n" "Make sure timeToLiveSeconds is set on your SparkApplication spec.\n" "underlying exception: %s", driver_pod_name, e, ) def poke(self, context: Dict) -> bool: self.log.info("Poking: %s", self.application_name) response = self.hook.get_custom_object( group="sparkoperator.k8s.io", version="v1beta2", plural="sparkapplications", name=self.application_name, namespace=self.namespace, ) try: application_state = response["status"]["applicationState"]["state"] except KeyError: return False if self.attach_log and application_state in self.FAILURE_STATES + self.SUCCESS_STATES: self._log_driver(application_state, response) if application_state in self.FAILURE_STATES: raise AirflowException("Spark application failed with state: %s" % application_state) elif application_state in self.SUCCESS_STATES: self.log.info("Spark application ended successfully") return True else: self.log.info("Spark application is still in state: %s", application_state) return False
def test_in_cluster_connection(self, mock_kube_config_loader): kubernetes_hook = KubernetesHook(conn_id='kubernetes_in_cluster') api_conn = kubernetes_hook.get_conn() mock_kube_config_loader.assert_called_once() self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)