Exemple #1
0
def test_kubernetes_optional():
    """Serialisation / deserialisation continues to work without kubernetes installed"""

    def mock__import__(name, globals_=None, locals_=None, fromlist=(), level=0):
        if level == 0 and name.partition('.')[0] == 'kubernetes':
            raise ImportError("No module named 'kubernetes'")
        return importlib.__import__(name, globals=globals_, locals=locals_, fromlist=fromlist, level=level)

    with mock.patch('builtins.__import__', side_effect=mock__import__) as import_mock:
        # load module from scratch, this does not replace any already imported
        # airflow.serialization.serialized_objects module in sys.modules
        spec = importlib.util.find_spec("airflow.serialization.serialized_objects")
        module = importlib.util.module_from_spec(spec)
        spec.loader.exec_module(module)

        # if we got this far, the module did not try to load kubernetes, but
        # did it try to access airflow.kubernetes.*?
        imported_airflow = {
            c.args[0].split('.', 2)[1] for c in import_mock.call_args_list if c.args[0].startswith("airflow.")
        }
        assert "kubernetes" not in imported_airflow

        # pod loading is not supported when kubernetes is not available
        pod_override = {
            '__type': 'k8s.V1Pod',
            '__var': PodGenerator.serialize_pod(executor_config_pod),
        }

        with pytest.raises(RuntimeError):
            module.BaseSerialization.from_dict(pod_override)

        # basic serialization should succeed
        module.SerializedDAG.to_dict(make_simple_dag()["simple_dag"])
    def _adopt_completed_pods(self, kube_client: kubernetes.client.CoreV1Api):
        """

        Patch completed pod so that the KubernetesJobWatcher can delete it.

        :param kube_client: kubernetes client for speaking to kube API
        """
        kwargs = {
            'field_selector': "status.phase=Succeeded",
            'label_selector': 'kubernetes_executor=True',
        }
        pod_list = kube_client.list_namespaced_pod(
            namespace=self.kube_config.kube_namespace, **kwargs)
        for pod in pod_list.items:
            self.log.info("Attempting to adopt pod %s", pod.metadata.name)
            pod.metadata.labels['airflow-worker'] = str(self.scheduler_job_id)
            try:
                kube_client.patch_namespaced_pod(
                    name=pod.metadata.name,
                    namespace=pod.metadata.namespace,
                    body=PodGenerator.serialize_pod(pod),
                )
            except ApiException as e:
                self.log.info("Failed to adopt pod %s. Reason: %s",
                              pod.metadata.name, e)
Exemple #3
0
    def adopt_launched_task(self, kube_client: client.CoreV1Api,
                            pod: k8s.V1Pod, pod_ids: Dict[TaskInstanceKey,
                                                          k8s.V1Pod]) -> None:
        """
        Patch existing pod so that the current KubernetesJobWatcher can monitor it via label selectors

        :param kube_client: kubernetes client for speaking to kube API
        :param pod: V1Pod spec that we will patch with new label
        :param pod_ids: pod_ids we expect to patch.
        """
        self.log.info("attempting to adopt pod %s", pod.metadata.name)
        pod.metadata.labels[
            'airflow-worker'] = pod_generator.make_safe_label_value(
                str(self.scheduler_job_id))
        pod_id = annotations_to_key(pod.metadata.annotations)
        if pod_id not in pod_ids:
            self.log.error(
                "attempting to adopt taskinstance which was not specified by database: %s",
                pod_id)
            return

        try:
            kube_client.patch_namespaced_pod(
                name=pod.metadata.name,
                namespace=pod.metadata.namespace,
                body=PodGenerator.serialize_pod(pod),
            )
            pod_ids.pop(pod_id)
            self.running.add(pod_id)
        except ApiException as e:
            self.log.info("Failed to adopt pod %s. Reason: %s",
                          pod.metadata.name, e)
    def adopt_launched_task(self, kube_client, pod, pod_ids: dict):
        """
        Patch existing pod so that the current KubernetesJobWatcher can monitor it via label selectors

        :param kube_client: kubernetes client for speaking to kube API
        :param pod: V1Pod spec that we will patch with new label
        :param pod_ids: pod_ids we expect to patch.
        """
        self.log.info("attempting to adopt pod %s", pod.metadata.name)
        pod.metadata.labels['airflow-worker'] = str(self.scheduler_job_id)
        dag_id = pod.metadata.labels['dag_id']
        task_id = pod.metadata.labels['task_id']
        pod_id = create_pod_id(dag_id=dag_id, task_id=task_id)
        if pod_id not in pod_ids:
            self.log.error(
                "attempting to adopt task %s in dag %s which was not specified by database",
                task_id,
                dag_id,
            )
        else:
            try:
                kube_client.patch_namespaced_pod(
                    name=pod.metadata.name,
                    namespace=pod.metadata.namespace,
                    body=PodGenerator.serialize_pod(pod),
                )
                pod_ids.pop(pod_id)
            except ApiException as e:
                self.log.info("Failed to adopt pod %s. Reason: %s",
                              pod.metadata.name, e)
Exemple #5
0
    def _default(obj):
        """Convert dates and numpy objects in a json serializable format."""
        if isinstance(obj, datetime):
            return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
        elif isinstance(obj, date):
            return obj.strftime('%Y-%m-%d')
        elif isinstance(
            obj,
            (
                np.int_,
                np.intc,
                np.intp,
                np.int8,
                np.int16,
                np.int32,
                np.int64,
                np.uint8,
                np.uint16,
                np.uint32,
                np.uint64,
            ),
        ):
            return int(obj)
        elif isinstance(obj, np.bool_):
            return bool(obj)
        elif isinstance(
            obj, (np.float_, np.float16, np.float32, np.float64, np.complex_, np.complex64, np.complex128)
        ):
            return float(obj)
        elif k8s is not None and isinstance(obj, (k8s.V1Pod, k8s.V1ResourceRequirements)):
            from airflow.kubernetes.pod_generator import PodGenerator

            return PodGenerator.serialize_pod(obj)

        raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
Exemple #6
0
    def _serialize(
        cls, var: Any
    ) -> Any:  # Unfortunately there is no support for recursive types in mypy
        """Helper function of depth first search for serialization.

        The serialization protocol is:

        (1) keeping JSON supported types: primitives, dict, list;
        (2) encoding other types as ``{TYPE: 'foo', VAR: 'bar'}``, the deserialization
            step decode VAR according to TYPE;
        (3) Operator has a special field CLASS to record the original class
            name for displaying in UI.
        """
        if cls._is_primitive(var):
            # enum.IntEnum is an int instance, it causes json dumps error so we use its value.
            if isinstance(var, enum.Enum):
                return var.value
            return var
        elif isinstance(var, dict):
            return cls._encode(
                {str(k): cls._serialize(v)
                 for k, v in var.items()},
                type_=DAT.DICT)
        elif isinstance(var, list):
            return [cls._serialize(v) for v in var]
        elif HAS_KUBERNETES and isinstance(var, k8s.V1Pod):
            json_pod = PodGenerator.serialize_pod(var)
            return cls._encode(json_pod, type_=DAT.POD)
        elif isinstance(var, DAG):
            return SerializedDAG.serialize_dag(var)
        elif isinstance(var, BaseOperator):
            return SerializedBaseOperator.serialize_operator(var)
        elif isinstance(var, cls._datetime_types):
            return cls._encode(var.timestamp(), type_=DAT.DATETIME)
        elif isinstance(var, datetime.timedelta):
            return cls._encode(var.total_seconds(), type_=DAT.TIMEDELTA)
        elif isinstance(var, Timezone):
            return cls._encode(encode_timezone(var), type_=DAT.TIMEZONE)
        elif isinstance(var, relativedelta.relativedelta):
            return cls._encode(encode_relativedelta(var),
                               type_=DAT.RELATIVEDELTA)
        elif callable(var):
            return str(get_python_source(var))
        elif isinstance(var, set):
            # FIXME: casts set to list in customized serialization in future.
            try:
                return cls._encode(sorted(cls._serialize(v) for v in var),
                                   type_=DAT.SET)
            except TypeError:
                return cls._encode([cls._serialize(v) for v in var],
                                   type_=DAT.SET)
        elif isinstance(var, tuple):
            # FIXME: casts tuple to list in customized serialization in future.
            return cls._encode([cls._serialize(v) for v in var],
                               type_=DAT.TUPLE)
        elif isinstance(var, TaskGroup):
            return SerializedTaskGroup.serialize_task_group(var)
        else:
            log.debug('Cast type %s to str in serialization.', type(var))
            return str(var)
 def patch_already_checked(self, pod: k8s.V1Pod):
     """
     Add an "already tried annotation to ensure we only retry once
     """
     pod.metadata.labels["already_checked"] = "True"
     body = PodGenerator.serialize_pod(pod)
     self.client.patch_namespaced_pod(pod.metadata.name,
                                      pod.metadata.namespace, body)
Exemple #8
0
    def _default(obj):
        """Convert dates and numpy objects in a json serializable format."""
        if isinstance(obj, datetime):
            return obj.strftime('%Y-%m-%dT%H:%M:%SZ')
        elif isinstance(obj, date):
            return obj.strftime('%Y-%m-%d')
        elif isinstance(obj, Decimal):
            _, _, exponent = obj.as_tuple()
            if exponent >= 0:  # No digits after the decimal point.
                return int(obj)
            # Technically lossy due to floating point errors, but the best we
            # can do without implementing a custom encode function.
            return float(obj)
        elif np is not None and isinstance(
                obj,
            (
                np.int_,
                np.intc,
                np.intp,
                np.int8,
                np.int16,
                np.int32,
                np.int64,
                np.uint8,
                np.uint16,
                np.uint32,
                np.uint64,
            ),
        ):
            return int(obj)
        elif np is not None and isinstance(obj, np.bool_):
            return bool(obj)
        elif np is not None and isinstance(
                obj, (np.float_, np.float16, np.float32, np.float64,
                      np.complex_, np.complex64, np.complex128)):
            return float(obj)
        elif k8s is not None and isinstance(
                obj, (k8s.V1Pod, k8s.V1ResourceRequirements)):
            from airflow.kubernetes.pod_generator import PodGenerator

            return PodGenerator.serialize_pod(obj)

        raise TypeError(
            f"Object of type '{obj.__class__.__name__}' is not JSON serializable"
        )
Exemple #9
0
     "_outlets": [],
     "ui_color": "#f0ede4",
     "ui_fgcolor": "#000",
     "template_fields": ['bash_command', 'env'],
     "template_fields_renderers": {'bash_command': 'bash', 'env': 'json'},
     "bash_command": "echo {{ task.task_id }}",
     'label': 'bash_task',
     "_task_type": "BashOperator",
     "_task_module": "airflow.operators.bash",
     "pool": "default_pool",
     "executor_config": {
         '__type': 'dict',
         '__var': {
             "pod_override": {
                 '__type': 'k8s.V1Pod',
                 '__var': PodGenerator.serialize_pod(executor_config_pod),
             }
         },
     },
 },
 {
     "task_id": "custom_task",
     "retries": 1,
     "retry_delay": 300.0,
     "sla": 100.0,
     "_downstream_task_ids": [],
     "_inlets": [],
     "_is_dummy": False,
     "_outlets": [],
     "_operator_extra_links": [{"tests.test_utils.mock_operators.CustomOpLink": {}}],
     "ui_color": "#fff",
Exemple #10
0
 def patch_already_checked(self, pod: k8s.V1Pod):
     """Add an "already checked" annotation to ensure we don't reattach on retries"""
     pod.metadata.labels[self.POD_CHECKED_KEY] = "True"
     body = PodGenerator.serialize_pod(pod)
     self.client.patch_namespaced_pod(pod.metadata.name,
                                      pod.metadata.namespace, body)
    def _serialize(cls, var: Any) -> Any:  # Unfortunately there is no support for recursive types in mypy
        """Helper function of depth first search for serialization.

        The serialization protocol is:

        (1) keeping JSON supported types: primitives, dict, list;
        (2) encoding other types as ``{TYPE: 'foo', VAR: 'bar'}``, the deserialization
            step decode VAR according to TYPE;
        (3) Operator has a special field CLASS to record the original class
            name for displaying in UI.
        """
        try:
            if cls._is_primitive(var):
                # enum.IntEnum is an int instance, it causes json dumps error so we use its value.
                if isinstance(var, enum.Enum):
                    return var.value
                return var
            elif isinstance(var, dict):
                return cls._encode(
                    {str(k): cls._serialize(v) for k, v in var.items()},
                    type_=DAT.DICT
                )
            elif isinstance(var, list):
                return [cls._serialize(v) for v in var]
            elif isinstance(var, k8s.V1Pod):
                json_pod = PodGenerator.serialize_pod(var)
                return cls._encode(json_pod, type_=DAT.POD)
            elif isinstance(var, DAG):
                return SerializedDAG.serialize_dag(var)
            elif isinstance(var, BaseOperator):
                return SerializedBaseOperator.serialize_operator(var)
            elif isinstance(var, cls._datetime_types):
                return cls._encode(var.timestamp(), type_=DAT.DATETIME)
            elif isinstance(var, datetime.timedelta):
                return cls._encode(var.total_seconds(), type_=DAT.TIMEDELTA)
            elif isinstance(var, (Timezone)):
                return cls._encode(str(var.name), type_=DAT.TIMEZONE)
            elif isinstance(var, relativedelta.relativedelta):
                encoded = {k: v for k, v in var.__dict__.items() if not k.startswith("_") and v}
                if var.weekday and var.weekday.n:
                    # Every n'th Friday for example
                    encoded['weekday'] = [var.weekday.weekday, var.weekday.n]
                elif var.weekday:
                    encoded['weekday'] = [var.weekday.weekday]
                return cls._encode(encoded, type_=DAT.RELATIVEDELTA)
            elif callable(var):
                return str(get_python_source(var))
            elif isinstance(var, set):
                # FIXME: casts set to list in customized serialization in future.
                return cls._encode(
                    [cls._serialize(v) for v in var], type_=DAT.SET)
            elif isinstance(var, tuple):
                # FIXME: casts tuple to list in customized serialization in future.
                return cls._encode(
                    [cls._serialize(v) for v in var], type_=DAT.TUPLE)
            else:
                log.debug('Cast type %s to str in serialization.', type(var))
                return str(var)
        except Exception:  # pylint: disable=broad-except
            log.error('Failed to stringify.', exc_info=True)
            return FAILED
Exemple #12
0
    def _default(obj):
        """Convert dates and numpy objects in a json serializable format."""
        if isinstance(obj, datetime):
            if is_naive(obj):
                obj = convert_to_utc(obj)
            return obj.isoformat()
        elif isinstance(obj, date):
            return obj.strftime('%Y-%m-%d')
        elif isinstance(obj, Decimal):
            _, _, exponent = obj.as_tuple()
            if exponent >= 0:  # No digits after the decimal point.
                return int(obj)
            # Technically lossy due to floating point errors, but the best we
            # can do without implementing a custom encode function.
            return float(obj)
        elif np is not None and isinstance(
                obj,
            (
                np.int_,
                np.intc,
                np.intp,
                np.int8,
                np.int16,
                np.int32,
                np.int64,
                np.uint8,
                np.uint16,
                np.uint32,
                np.uint64,
            ),
        ):
            return int(obj)
        elif np is not None and isinstance(obj, np.bool_):
            return bool(obj)
        elif np is not None and isinstance(
                obj, (np.float_, np.float16, np.float32, np.float64,
                      np.complex_, np.complex64, np.complex128)):
            return float(obj)
        elif k8s is not None and isinstance(
                obj, (k8s.V1Pod, k8s.V1ResourceRequirements)):
            from airflow.kubernetes.pod_generator import PodGenerator

            def safe_get_name(pod):
                """
                We're running this in an except block, so we don't want it to
                fail under any circumstances, e.g. by accessing an attribute that isn't there
                """
                try:
                    return pod.metadata.name
                except Exception:
                    return None

            try:
                return PodGenerator.serialize_pod(obj)
            except Exception:
                log.warning("JSON encoding failed for pod %s",
                            safe_get_name(obj))
                log.debug("traceback for pod JSON encode error", exc_info=True)
                return {}

        raise TypeError(
            f"Object of type '{obj.__class__.__name__}' is not JSON serializable"
        )