Exemple #1
0
 def test_kube_config_path(self, mock_kube_config_loader,
                           mock_kube_config_merger):
     kubernetes_hook = KubernetesHook(conn_id='kubernetes_kube_config_path')
     api_conn = kubernetes_hook.get_conn()
     mock_kube_config_loader.assert_called_once_with("path/to/file")
     mock_kube_config_merger.assert_called_once()
     self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
Exemple #2
0
    def _patch_deprecated_k8s_settings(self, hook: KubernetesHook):
        """
        Here we read config from core Airflow config [kubernetes] section.
        In a future release we will stop looking at this section and require users
        to use Airflow connections to configure KPO.

        When we find values there that we need to apply on the hook, we patch special
        hook attributes here.
        """
        # default for enable_tcp_keepalive is True; patch if False
        if conf.getboolean('kubernetes', 'enable_tcp_keepalive') is False:
            hook._deprecated_core_disable_tcp_keepalive = True

        # default verify_ssl is True; patch if False.
        if conf.getboolean('kubernetes', 'verify_ssl') is False:
            hook._deprecated_core_disable_verify_ssl = True

        # default for in_cluster is True; patch if False and no KPO param.
        conf_in_cluster = conf.getboolean('kubernetes', 'in_cluster')
        if self.in_cluster is None and conf_in_cluster is False:
            hook._deprecated_core_in_cluster = conf_in_cluster

        # there's no default for cluster context; if we get something (and no KPO param) patch it.
        conf_cluster_context = conf.get('kubernetes',
                                        'cluster_context',
                                        fallback=None)
        if not self.cluster_context and conf_cluster_context:
            hook._deprecated_core_cluster_context = conf_cluster_context

        # there's no default for config_file; if we get something (and no KPO param) patch it.
        conf_config_file = conf.get('kubernetes', 'config_file', fallback=None)
        if not self.config_file and conf_config_file:
            hook._deprecated_core_config_file = conf_config_file
 def test_kube_config_connection(self, mock_kube_config_loader,
                                 mock_kube_config_merger, mock_tempfile):
     kubernetes_hook = KubernetesHook(conn_id='kubernetes_kube_config')
     api_conn = kubernetes_hook.get_conn()
     mock_tempfile.is_called_once()
     mock_kube_config_loader.assert_called_once()
     mock_kube_config_merger.assert_called_once()
     self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
Exemple #4
0
 def test_should_raise_exception_on_invalid_configuration(self, conn_uri):
     with mock.patch.dict("os.environ",
                          AIRFLOW_CONN_KUBERNETES_DEFAULT=conn_uri
                          ), self.assertRaisesRegex(
                              AirflowException,
                              "Invalid connection configuration"):
         kubernetes_hook = KubernetesHook()
         kubernetes_hook.get_conn()
Exemple #5
0
 def execute(self, context):
     self.log.info("Creating sparkApplication")
     hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
     response = hook.create_custom_resource_definition(
         group="sparkoperator.k8s.io",
         version="v1beta2",
         plural="sparkapplications",
         body=self.application_file,
         namespace=self.namespace)
     return response
Exemple #6
0
 def execute(self, context):
     self.log.info("Creating sparkApplication")
     hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
     response = hook.create_custom_object(
         group=self.api_group,
         version=self.api_version,
         plural="sparkapplications",
         body=self.application_file,
         namespace=self.namespace,
     )
     return response
 def test_default_kube_config_connection(
     self,
     mock_kube_config_loader,
     mock_kube_config_merger,
 ):
     kubernetes_hook = KubernetesHook(
         conn_id='kubernetes_default_kube_config')
     api_conn = kubernetes_hook.get_conn()
     mock_kube_config_loader.assert_called_once_with("/mock/config")
     mock_kube_config_merger.assert_called_once()
     self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)
Exemple #8
0
 def __init__(
     self,
     *,
     application_name: str,
     attach_log: bool = False,
     namespace: Optional[str] = None,
     kubernetes_conn_id: str = "kubernetes_default",
     **kwargs,
 ) -> None:
     super().__init__(**kwargs)
     self.application_name = application_name
     self.attach_log = attach_log
     self.namespace = namespace
     self.kubernetes_conn_id = kubernetes_conn_id
     self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
Exemple #9
0
 def get_hook(self):
     hook = KubernetesHook(
         conn_id=self.kubernetes_conn_id,
         in_cluster=self.in_cluster,
         config_file=self.config_file,
         cluster_context=self.cluster_context,
     )
     self._patch_deprecated_k8s_settings(hook)
     return hook
Exemple #10
0
 def __init__(
     self,
     *,
     application_name: str,
     attach_log: bool = False,
     namespace: Optional[str] = None,
     kubernetes_conn_id: str = "kubernetes_default",
     api_group: str = 'sparkoperator.k8s.io',
     api_version: str = 'v1beta2',
     **kwargs,
 ) -> None:
     super().__init__(**kwargs)
     self.application_name = application_name
     self.attach_log = attach_log
     self.namespace = namespace
     self.kubernetes_conn_id = kubernetes_conn_id
     self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
     self.api_group = api_group
     self.api_version = api_version
 def poke(self, context: Dict):
     self.log.info("Poking: %s", self.application_name)
     hook = KubernetesHook(conn_id=self.kubernetes_conn_id)
     response = hook.get_custom_resource_definition(
         group="sparkoperator.k8s.io",
         version="v1beta2",
         plural="sparkapplications",
         name=self.application_name,
         namespace=self.namespace)
     try:
         application_state = response['status']['applicationState']['state']
     except KeyError:
         return False
     if application_state in self.FAILURE_STATES:
         raise AirflowException("Spark application failed with state: %s" % application_state)
     elif application_state in self.SUCCESS_STATES:
         self.log.info("Spark application ended successfully")
         return True
     else:
         self.log.info("Spark application is still in state: %s", application_state)
         return False
Exemple #12
0
 def test_get_namespace(self):
     kubernetes_hook_with_namespace = KubernetesHook(
         conn_id='kubernetes_with_namespace')
     kubernetes_hook_without_namespace = KubernetesHook(
         conn_id='kubernetes_default_kube_config')
     assert kubernetes_hook_with_namespace.get_namespace(
     ) == 'mock_namespace'
     assert kubernetes_hook_without_namespace.get_namespace() == 'default'
 def test_get_namespace(self):
     kubernetes_hook_with_namespace = KubernetesHook(
         conn_id='kubernetes_with_namespace')
     kubernetes_hook_without_namespace = KubernetesHook(
         conn_id='kubernetes_default_kube_config')
     self.assertEqual(kubernetes_hook_with_namespace.get_namespace(),
                      'mock_namespace')
     self.assertEqual(kubernetes_hook_without_namespace.get_namespace(),
                      'default')
Exemple #14
0
class SparkKubernetesSensor(BaseSensorOperator):
    """
    Checks sparkApplication object in kubernetes cluster:

    .. seealso::
        For more detail about Spark Application Object have a look at the reference:
        https://github.com/GoogleCloudPlatform/spark-on-k8s-operator/blob/v1beta2-1.1.0-2.4.5/docs/api-docs.md#sparkapplication

    :param application_name: spark Application resource name
    :type application_name:  str
    :param namespace: the kubernetes namespace where the sparkApplication reside in
    :type namespace: str
    :param kubernetes_conn_id: the connection to Kubernetes cluster
    :type kubernetes_conn_id: str
    :param attach_log: determines whether logs for driver pod should be appended to the sensor log
    :type attach_log: bool
    """

    template_fields = ("application_name", "namespace")
    FAILURE_STATES = ("FAILED", "UNKNOWN")
    SUCCESS_STATES = ("COMPLETED", )

    @apply_defaults
    def __init__(
        self,
        *,
        application_name: str,
        attach_log: bool = False,
        namespace: Optional[str] = None,
        kubernetes_conn_id: str = "kubernetes_default",
        **kwargs,
    ) -> None:
        super().__init__(**kwargs)
        self.application_name = application_name
        self.attach_log = attach_log
        self.namespace = namespace
        self.kubernetes_conn_id = kubernetes_conn_id
        self.hook = KubernetesHook(conn_id=self.kubernetes_conn_id)

    def _log_driver(self, application_state: str, response: dict) -> None:
        if not self.attach_log:
            return
        status_info = response["status"]
        if "driverInfo" not in status_info:
            return
        driver_info = status_info["driverInfo"]
        if "podName" not in driver_info:
            return
        driver_pod_name = driver_info["podName"]
        namespace = response["metadata"]["namespace"]
        log_method = self.log.error if application_state in self.FAILURE_STATES else self.log.info
        try:
            log = ""
            for line in self.hook.get_pod_logs(driver_pod_name,
                                               namespace=namespace):
                log += line.decode()
            log_method(log)
        except client.rest.ApiException as e:
            self.log.warning(
                "Could not read logs for pod %s. It may have been disposed.\n"
                "Make sure timeToLiveSeconds is set on your SparkApplication spec.\n"
                "underlying exception: %s",
                driver_pod_name,
                e,
            )

    def poke(self, context: Dict) -> bool:
        self.log.info("Poking: %s", self.application_name)
        response = self.hook.get_custom_object(
            group="sparkoperator.k8s.io",
            version="v1beta2",
            plural="sparkapplications",
            name=self.application_name,
            namespace=self.namespace,
        )
        try:
            application_state = response["status"]["applicationState"]["state"]
        except KeyError:
            return False
        if self.attach_log and application_state in self.FAILURE_STATES + self.SUCCESS_STATES:
            self._log_driver(application_state, response)
        if application_state in self.FAILURE_STATES:
            raise AirflowException("Spark application failed with state: %s" %
                                   application_state)
        elif application_state in self.SUCCESS_STATES:
            self.log.info("Spark application ended successfully")
            return True
        else:
            self.log.info("Spark application is still in state: %s",
                          application_state)
            return False
 def test_in_cluster_connection(self, mock_kube_config_loader):
     kubernetes_hook = KubernetesHook(conn_id='kubernetes_in_cluster')
     api_conn = kubernetes_hook.get_conn()
     mock_kube_config_loader.assert_called_once()
     self.assertIsInstance(api_conn, kubernetes.client.api_client.ApiClient)