コード例 #1
0
ファイル: sagemaker_task.py プロジェクト: turbaszek/dbnd
class SageMakerTrainTask(Task):
    estimator_config = parameter[BaseEstimatorConfig]

    train = parameter(
        description="path to an s3 bucket where training data is stored"
    )[Target]
    test = parameter.none().help(
        "path to an s3 bucket where test data is stored")[Target]
    validate = parameter.none().help(
        "path to an s3 bucket where validation data is stored")[Target]

    region = parameter(default="us-east-1",
                       description="region to use for docker image resolution")
    output_path = (output.description("s3 path to output a model").folder_data.
                   with_flag(None).no_extension[Target])
    sagemaker_role = parameter[str]

    wait_for_completion = parameter.c(default=True)[bool]
    print_log = parameter.c(default=True)[bool]
    check_interval = parameter.c(default=30)[int]
    max_ingestion_time = parameter.c(default=None)[int]

    def _get_ctrl(self):
        # type: ()-> SageMakerCtrl
        return SageMakerCtrl(self)

    def _task_submit(self):
        self._get_ctrl().train()
        return "Ok"
コード例 #2
0
ファイル: apache_beam_task.py プロジェクト: turbaszek/dbnd
class ApacheBeamJavaTask(_BeamTask):
    main_class = parameter.none(
        description="The entry point for your application", system=True)[str]

    def _task_submit(self):
        if not self.beam.main_jar:
            raise DatabandConfigError("main_jar is not configured for %s" %
                                      self)

        return self._get_job_ctrl().run_cmd_java(jar=self.beam.jar,
                                                 main_class=self.main_class)
コード例 #3
0
class KubernetesEngineConfig(ContainerEngineConfig):
    _conf__task_family = "kubernetes"

    cluster_context = parameter.none().help("Kubernetes cluster context")[str]
    config_file = parameter.none().help("Custom Kubernetes config file")[str]

    in_cluster = parameter(default=False)[bool]

    image_pull_policy = parameter.value(
        "IfNotPresent", description="Kubernetes image_pull_policy flag")

    image_pull_secrets = parameter.none().help(
        "Secret to use for image pull")[str]
    keep_finished_pods = parameter(
        default=False).help("Don't delete pods on completion")[bool]
    keep_failed_pods = parameter(
        default=False).help("Don't delete failed pods")[bool]

    namespace = parameter(default="default")[str]
    secrets = parameter(empty_default=True).help(
        "User secrets to be added to every created pod")[List]
    system_secrets = parameter(empty_default=True).help(
        "System secrets (used by Databand Framework)")[List]
    env_vars = parameter(empty_default=True)[Dict]

    node_selectors = parameter(empty_default=True)[Dict]
    annotations = parameter(empty_default=True)[Dict]
    pods_creation_batch_size = parameter.value(10)[int]
    service_account_name = parameter.none()[str]
    gcp_service_account_keys = parameter.none()[
        str]  # it's actually dict, but KubeConf expects str
    affinity = parameter(empty_default=True)[Dict]
    tolerations = parameter(empty_default=True)[List]

    hostnetwork = parameter.value(False)
    configmaps = parameter(empty_default=True)[List[str]]

    volumes = parameter.none()[List[str]]
    volume_mounts = parameter.none()[List[str]]
    security_context = parameter.none()[List[str]]
    labels = parameter.none()[Dict]

    request_memory = parameter.none()[str]
    request_cpu = parameter.none()[str]
    limit_memory = parameter.none()[str]
    limit_cpu = parameter.none()[str]

    requests = parameter.none()[Dict]
    limits = parameter.none()[Dict]

    pod_exit_code_to_retry_count = parameter(empty_default=True).help(
        "Mapping between pod exit code to amount of pod retry attempts")[Dict]
    pod_retry_delay = parameter.help(
        "The delay between each pod retry attempt in time delta format. 1m, 5s, 1h, etc."
    )[datetime.timedelta]
    retry_on_image_pull_error_count = parameter.help(
        "Describes the amount of retry attempts when a pod fails with "
        "'ErrImagePull'").default(0)[int]

    startup_timeout_seconds = parameter.value(120)
    show_pod_log = parameter(default=False).help(
        "When using this engine as the task_engine, run tasks sequentially and stream their logs"
    )[bool]
    debug = parameter(default=False).help(
        "Equalent to show_pod_log=True + show all debug information")[bool]

    prefix_remote_log = parameter(default=True).help(
        "Adds [driver] or [<task_name>] prefix to logs streamed from Kubernetes to the local log"
    )
    check_unschedulable_condition = parameter(default=True).help(
        "Try to detect non-transient issues that prevent the pod from being scheduled and fail the run if needed"
    )
    check_cluster_resource_capacity = parameter(default=True).help(
        "When a pod can't be scheduled due to cpu or memory constraints, check if the constraints are possible to satisfy in the cluster"
    )

    startup_timeout = parameter(default="10m").help(
        "Time to wait for pod getting into Running state")[datetime.timedelta]

    dashboard_url = parameter(default=None).help(
        "skeleton url to display as kubernetes dashboard")[str]

    pod_log_url = parameter(
        default=None).help("skeleton url to display logs of pods")[str]

    pod_yaml = parameter(default="${DBND_LIB}/conf/kubernetes-pod.yaml").help(
        "Base yaml to use to run databand task/driver")[str]

    trap_exit_file_flag = parameter(default=None).help("trap exit file")[str]
    auto_remove = parameter(
        default=False,
        description="Auto-removal of the pod when container has finished.",
    )[bool]
    detach_run = parameter(
        default=False,
        description="Submit run only, do not wait for it completion.")[bool]

    submit_termination_grace_period = parameter(
        description="timedelta to let the submitted pod enter a final state")[
            datetime.timedelta]

    def _initialize(self):
        super(KubernetesEngineConfig, self)._initialize()

        if self.debug:
            logger.warning(
                "Running in debug mode, setting all k8s loggers to debug, waiting for every pod completion!"
            )
            import airflow.contrib.kubernetes

            set_module_logging_to_debug(
                [dbnd_docker, airflow.contrib.kubernetes])
            self.detach_run = False
        if self.show_pod_log:
            logger.warning(
                "Showing pod logs at runtime, waiting for every pod completion!"
            )
            self.detach_run = False
        if self.auto_remove and not self.detach_run:
            logger.warning(
                "Can't auto remove pod if not running from detach_run=True mode, "
                "switching to auto_remove=False")
            self.auto_remove = False

    def get_docker_ctrl(self, task_run):
        from dbnd_docker.kubernetes.kubernetes_task_run_ctrl import (
            KubernetesTaskRunCtrl, )

        return KubernetesTaskRunCtrl(task_run=task_run)

    def submit_to_engine_task(self, env, task_name, args, interactive=True):
        docker_engine = self
        if not interactive:
            docker_engine = docker_engine.clone(auto_remove=True,
                                                detach_run=True)
        return DockerRunTask(
            task_name=task_name,
            command=subprocess.list2cmdline(args),
            image=self.full_image,
            docker_engine=docker_engine,
            task_is_system=True,
        )

    def cleanup_after_run(self):
        # this run was submitted by task_run_async - we need to cleanup ourself
        if not environ_enabled(ENV_DBND_AUTO_REMOVE_POD):
            return
        if ENV_DBND_POD_NAME in environ and ENV_DBND_POD_NAMESPACE in environ:
            try:
                logger.warning(
                    "Auto deleteing pod as accordingly to '%s' env variable" %
                    ENV_DBND_AUTO_REMOVE_POD)
                kube_dbnd = self.build_kube_dbnd()
                kube_dbnd.delete_pod(
                    name=environ[ENV_DBND_POD_NAME],
                    namespace=environ[ENV_DBND_POD_NAMESPACE],
                )
            except Exception as e:
                logger.warning("Tried to delete this pod but failed: %s" % e)
        else:
            logger.warning(
                "Auto deleting pod as set, but pod name and pod namespace is not defined"
            )

    def get_dashboard_link(self, pod):
        if not self.dashboard_url:
            return None
        try:
            return self.dashboard_url.format(namespace=pod.namespace,
                                             pod=pod.name)
        except Exception:
            logger.exception("Failed to generate dashboard url from %s" %
                             self.dashboard_url)
        return None

    def get_pod_log_link(self, pod):
        if not self.pod_log_url:
            return None
        try:
            return self.pod_log_url.format(
                namespace=pod.namespace,
                pod=pod.name,
                timestamp=datetime.datetime.now().isoformat(),
            )
        except Exception:
            logger.exception("Internal error on generating pod log url")
        return None

    def get_kube_client(self, in_cluster=None):
        from kubernetes import config, client

        if in_cluster is None:
            in_cluster = self.in_cluster
        if in_cluster:
            config.load_incluster_config()
        else:
            config.load_kube_config(config_file=self.config_file,
                                    context=self.cluster_context)

        if PY2:
            # For connect_get_namespaced_pod_exec
            from kubernetes.client import Configuration

            configuration = Configuration()
            configuration.assert_hostname = False
            Configuration.set_default(configuration)
        return client.CoreV1Api()

    def build_kube_dbnd(self, in_cluster=None):
        from dbnd_docker.kubernetes.kube_dbnd_client import DbndKubernetesClient
        from kubernetes.config import ConfigException

        try:
            kube_client = self.get_kube_client(in_cluster=in_cluster)
        except ConfigException as e:
            raise friendly_error.executor_k8s.failed_to_connect_to_cluster(
                self.in_cluster, e)

        kube_dbnd = DbndKubernetesClient(kube_client=kube_client,
                                         engine_config=self)
        return kube_dbnd

    def get_pod_name(self, task_run, try_number):
        pod_name = task_run.job_id__dns1123
        if try_number is not None:
            pod_name = "%s-%s" % (pod_name, try_number)
        return pod_name

    def build_pod(
        self,
        task_run,
        cmds,
        args=None,
        labels=None,
        try_number=None,
        include_system_secrets=False,
    ):
        # type: (TaskRun, List[str], Optional[List[str]], Optional[Dict[str,str]], Optional[int]) ->Pod
        pod_name = self.get_pod_name(task_run=task_run, try_number=try_number)

        image = self.full_image
        labels = combine_mappings(labels, self.labels)
        labels["dbnd_run_uid"] = clean_job_name_dns1123(
            str(task_run.run.run_uid))
        labels["dbnd_task_run_uid"] = clean_job_name_dns1123(
            str(task_run.task_run_uid))
        labels[
            "dbnd"] = "task_run"  # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>)

        annotations = self.annotations.copy()
        if self.gcp_service_account_keys:
            annotations[
                "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys
        annotations["dbnd_tracker"] = task_run.task_tracker_url

        from dbnd_docker.kubernetes.dbnd_extended_resources import DbndExtendedResources

        resources = DbndExtendedResources(
            requests=self.requests,
            limits=self.limits,
            request_memory=self.request_memory,
            request_cpu=self.request_cpu,
            limit_memory=self.limit_memory,
            limit_cpu=self.limit_cpu,
        )
        env_vars = {
            ENV_DBND_POD_NAME: pod_name,
            ENV_DBND_POD_NAMESPACE: self.namespace,
            ENV_DBND_USER: task_run.task_run_env.user,
            ENV_DBND__ENV_IMAGE: image,
            ENV_DBND_ENV: task_run.run.env.task_name,
            ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace),
        }
        if self.auto_remove:
            env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True"
        env_vars[self._params.get_param_env_key("in_cluster")] = "True"
        env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True"
        env_vars[
            "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version

        # we want that all next runs will be able to use the image that we have in our configuration

        env_vars.update(
            self._params.to_env_map("container_repository", "container_tag"))

        env_vars.update(self.env_vars)
        env_vars.update(task_run.run.get_context_spawn_env())

        secrets = self.get_secrets(
            include_system_secrets=include_system_secrets)

        from airflow.contrib.kubernetes.pod import Pod

        if self.trap_exit_file_flag:
            args = [
                textwrap.dedent("""
                trap "touch {trap_file}" EXIT
                {command}
                """.format(
                    trap_file=self.trap_exit_file_flag,
                    command=subprocess.list2cmdline(cmds),
                ))
            ]
            # we update cmd now
            cmds = ["/bin/bash", "-c"]

        if not self.container_tag:
            raise DatabandConfigError(
                "Your container tag is None, please check your configuration",
                help_msg="Container tag should be assigned",
            )

        pod = Pod(
            namespace=self.namespace,
            name=pod_name,
            envs=env_vars,
            image=image,
            cmds=cmds,
            args=args,
            labels=labels,
            image_pull_policy=self.image_pull_policy,
            image_pull_secrets=self.image_pull_secrets,
            secrets=secrets,
            service_account_name=self.service_account_name,
            volumes=self.volumes,
            volume_mounts=self.volume_mounts,
            annotations=annotations,
            node_selectors=self.node_selectors,
            affinity=self.affinity,
            tolerations=self.tolerations,
            security_context=self.security_context,
            configmaps=self.configmaps,
            hostnetwork=self.hostnetwork,
            resources=resources,
        )

        if self.pod_yaml:
            pod.pod_yaml = target(self.pod_yaml).read()

        return pod

    def get_secrets(self, include_system_secrets=True):
        """Defines any necessary secrets for the pod executor"""
        from airflow.contrib.kubernetes.secret import Secret

        result = []
        if include_system_secrets:
            secrets = self.system_secrets + self.secrets
        else:
            secrets = self.secrets
        for secret_data in secrets:
            result.append(
                Secret(
                    deploy_type=secret_data.get("type"),
                    deploy_target=secret_data.get("target"),
                    secret=secret_data.get("secret"),
                    key=secret_data.get("key"),
                ))

        return result

    def build_kube_pod_req(self, pod):
        from dbnd_airflow.airflow_extensions.request_factory import (
            DbndPodRequestFactory, )

        self.apply_env_vars_to_pod(pod)
        kube_req_factory = DbndPodRequestFactory()
        if hasattr(pod, "pod_yaml"):
            kube_req_factory._yaml = pod.pod_yaml

        req = kube_req_factory.create(pod)
        return req

    def apply_env_vars_to_pod(self, pod):
        pod.envs["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
コード例 #4
0
class ContainerEngineConfig(EngineConfig):
    require_submit = True
    dbnd_executable = [
        "dbnd"
    ]  # we should have 'dbnd' command installed in container
    container_repository = parameter(
        validator=NonEmptyString()).help("Docker container registry")[str]
    container_tag = parameter.none().help("Docker container tag")[VersionStr]
    container_tag_gpu = parameter.none().help(
        "Docker container tag for GPU tasks")[VersionStr]

    docker_build_tag = parameter.help("Auto build docker container tag").value(
        "dbnd_build")
    docker_build = parameter(default=True).help(
        "Automatically build docker image. "
        "If container_repository is unset it will be taken (along with the tag) from the docker build settings"
    )[bool]
    docker_build_push = parameter(default=True).help(
        "If docker_build is enabled, controls whether the image is automatically pushed or not"
    )

    def get_docker_ctrl(self, task_run):
        pass

    @property
    def full_image(self):
        return "{}:{}".format(self.container_repository, self.container_tag)

    def prepare_for_run(self, run):
        # type: (DatabandRun) -> None
        super(ContainerEngineConfig, self).prepare_for_run(run)

        from dbnd_docker.submit_ctrl import prepare_docker_for_executor

        # when we run at submitter - we need to update driver_engine - this one will be used to send job
        # when we run at driver - we update task config, it will be used by task
        # inside pod submission the fallback is always on task_engine

        prepare_docker_for_executor(run, self)

    def submit_to_engine_task(self, env, task_name, args, interactive=True):
        from dbnd_docker.docker.docker_task import DockerRunTask

        submit_task = DockerRunTask(
            task_name=task_name,
            command=subprocess.list2cmdline(args),
            image=self.full_image,
            docker_engine=self,
            task_is_system=True,
        )
        return submit_task

    def _should_wrap_with_submit_task(self, task_run):
        """
        We don't want to resubmit if it's dockerized run and we running with the same engine
        """
        from dbnd_docker.docker.docker_task import DockerRunTask

        if isinstance(task_run.task, DockerRunTask):
            if task_run.task.docker_engine.task_name == self.task_name:
                return False
        return super(ContainerEngineConfig,
                     self)._should_wrap_with_submit_task(task_run)
コード例 #5
0
class KubernetesEngineConfig(ContainerEngineConfig):
    _conf__task_family = "kubernetes"

    cluster_context = parameter.none().help("Kubernetes cluster context")[str]
    config_file = parameter.none().help("Custom Kubernetes config file")[str]

    in_cluster = parameter(default=None).help(
        "If None, we set it dynamically, according to where we run.")[bool]

    image_pull_policy = parameter.value(
        "IfNotPresent", description="Kubernetes image_pull_policy flag")

    image_pull_secrets = parameter.none().help(
        "Secret to use for image pull")[str]
    keep_finished_pods = parameter(
        default=False).help("Don't delete pods on completion")[bool]
    keep_failed_pods = parameter(
        default=False).help("Don't delete failed pods")[bool]

    namespace = parameter(default="default")[str]
    secrets = parameter(empty_default=True).help(
        "User secrets to be added to every created pod")[List]
    system_secrets = parameter(empty_default=True).help(
        "System secrets (used by Databand Framework)")[List]
    env_vars = parameter(empty_default=True)[Dict]

    node_selectors = parameter(empty_default=True)[Dict]
    annotations = parameter(empty_default=True)[Dict]
    pods_creation_batch_size = parameter.value(10)[int]
    service_account_name = parameter.none()[str]
    gcp_service_account_keys = parameter.none()[
        str]  # it's actually dict, but KubeConf expects str
    affinity = parameter(empty_default=True)[Dict]
    tolerations = parameter(empty_default=True)[List]

    hostnetwork = parameter.value(False)
    configmaps = parameter(empty_default=True)[List[str]]

    volumes = parameter.none()[List[str]]
    volume_mounts = parameter.none()[List[str]]
    security_context = parameter.none()[List[str]]
    labels = parameter.none()[Dict]

    request_memory = parameter.none()[str]
    request_cpu = parameter.none()[str]
    limit_memory = parameter.none()[str]
    limit_cpu = parameter.none()[str]

    requests = parameter.none()[Dict]
    limits = parameter.none()[Dict]

    pod_error_cfg_source_dict = parameter(
        description="Values for pod error handling configuration")[Dict]

    pod_default_retry_delay = parameter(
        description=
        "The default amount of time to wait between retries of pods",
        default="10s",
    )[datetime.timedelta]

    submit_termination_grace_period = parameter(
        description="timedelta to let the submitted pod enter a final state")[
            datetime.timedelta]

    startup_timeout_seconds = parameter.value(120)
    show_pod_log = parameter(default=False).help(
        "When using this engine as the task_engine, run tasks sequentially and stream their logs"
    )[bool]
    debug = parameter(default=False).help(
        "Equalent to show_pod_log=True + show all debug information")[bool]
    debug_with_command = parameter(default="").help(
        "Use this command as a pod command instead of the original, can help debug complicated issues"
    )[str]
    debug_phase = parameter(default="").help(
        "Debug mode for speicific phase of pod events. All these events will be printed with the full response from k8s"
    )[str]

    prefix_remote_log = parameter(default=True).help(
        "Adds [driver] or [<task_name>] prefix to logs streamed from Kubernetes to the local log"
    )
    check_unschedulable_condition = parameter(default=True).help(
        "Try to detect non-transient issues that prevent the pod from being scheduled and fail the run if needed"
    )
    check_image_pull_errors = parameter(default=True).help(
        "Try to detect image pull issues that prevent the pod from being scheduled and fail the run if needed"
    )
    check_running_pod_errors = parameter(default=False).help(
        "Try to detect running pod issues like failed ContainersReady condition (pod is deleted)"
    )
    check_cluster_resource_capacity = parameter(default=True).help(
        "When a pod can't be scheduled due to cpu or memory constraints, check if the constraints are possible to satisfy in the cluster"
    )

    startup_timeout = parameter(default="10m").help(
        "Time to wait for pod getting into Running state")[datetime.timedelta]

    dashboard_url = parameter(default=None).help(
        "skeleton url to display as kubernetes dashboard")[str]

    pod_log_url = parameter(
        default=None).help("skeleton url to display logs of pods")[str]

    pod_yaml = parameter(default="${DBND_LIB}/conf/kubernetes-pod.yaml").help(
        "Base yaml to use to run databand task/driver")[str]

    trap_exit_file_flag = parameter(default=None).help("trap exit file")[str]
    auto_remove = parameter(
        default=False,
        description="Auto-removal of the pod when container has finished.",
    )[bool]
    detach_run = parameter(
        default=False,
        description="Submit run only, do not wait for it completion.")[bool]

    watcher_request_timeout_seconds = parameter(
        default=300,
        description="How many seconds watcher should wait "
        "for events "
        "until timeout",
    )[int]

    watcher_recreation_interval_seconds = parameter(
        default=30,
        description=
        "How many seconds to wait before resurrecting watcher after the timeout",
    )[int]

    watcher_client_timeout_seconds = parameter(
        default=50,
        description=
        "How many seconds to wait before timeout occurs in watcher on client side (read)",
    )[int]

    log_pod_events_on_sigterm = parameter(
        default=False,
        description=
        "When receiving sigterm log current pod state to debug why the pod was terminated",
    )

    pending_zombies_timeout = parameter(
        default="5h",
        description=
        "Amount of time we will wait before a pending pod would consider a"
        " zombie and we will set it to fail",
    ).type(TimeDeltaValueType)

    zombie_query_interval_secs = parameter(
        default=600,
        description=
        "Amount of seconds we wait between zombie checking intervals. "
        "Default: 600 sec => 10 minutes",
    )

    zombie_threshold_secs = parameter(
        default=300,
        description="If the job has not heartbeat in this many seconds, "
        "the scheduler will mark the associated task instance as failed and will re-schedule the task.",
    )

    # airflow live logs feature
    # ------------------------- #
    airflow_log_enabled = parameter(
        default=False,
        description="Enables Airflow Live log at KubernetesExecutor feature",
    )[bool]
    airflow_log_image = parameter(
        default=None,
        description=
        "Overrider the image that will be used to add sidecar to the run which will expose the live "
        "logs of the run. By default the main container image will be used",
    )[str]
    airflow_log_folder = parameter(
        default="/usr/local/airflow/logs",
        description=
        "Specify the location on the airflow image (sidecar), where we mount the logs from the original"
        " container and expose them to airflow ui.",
    )[str]
    airflow_log_port = parameter(
        default="8793",
        description=
        "The port airflow live log sidecar will expose its service. This port should match the port "
        "airflow webserver tries to access the live logs ",
    )[str]

    airflow_log_trap_exit_flag_default = parameter(
        default="/tmp/pod/terminated",
        description=
        "The path that will be used by default if `airflow_log_enabled` is true ",
    )[str]

    container_airflow_log_path = parameter(
        default="/root/airflow/logs/",
        description="The path to the airflow logs, on the databand container.",
    )
    host_as_ip_for_live_logs = parameter(
        default=True,
        description="Set the host of the pod to be the ip address of the pod."
        "In kubernetes normally, only Services get DNS names, not Pods."
        "We use the ip for airflow webserver to lookup the sidecar See more here: https://stackoverflow.com/a/59262628",
    )

    def _initialize(self):
        super(KubernetesEngineConfig, self)._initialize()

        if self.debug:
            logger.warning(
                "Running in debug mode, setting all k8s loggers to debug, waiting for every pod completion!"
            )
            if AIRFLOW_VERSION_2:
                from airflow import kubernetes
            else:
                from airflow.contrib import kubernetes

            set_module_logging_to_debug([dbnd_docker, kubernetes])
            self.detach_run = False
        if self.show_pod_log:
            logger.warning(
                "Showing pod logs at runtime, waiting for every pod completion!"
            )
            self.detach_run = False
        if self.auto_remove and not self.detach_run:
            logger.warning(
                "Can't auto remove pod if not running from detach_run=True mode, "
                "switching to auto_remove=False")
            self.auto_remove = False

        if (self.airflow_log_enabled and not self.trap_exit_file_flag
                and self.airflow_log_trap_exit_flag_default):
            self.trap_exit_file_flag = self.airflow_log_trap_exit_flag_default

        self.pod_retry_config = PodRetryConfiguration.from_kube_config(self)

    def get_docker_ctrl(self, task_run):
        from dbnd_docker.kubernetes.kubernetes_task_run_ctrl import (
            KubernetesTaskRunCtrl, )

        return KubernetesTaskRunCtrl(task_run=task_run)

    def submit_to_engine_task(self, env, task_name, args, interactive=True):
        docker_engine = self
        if not interactive:
            docker_engine = docker_engine.clone(auto_remove=True,
                                                detach_run=True)
        return DockerRunTask(
            task_name=task_name,
            command=subprocess.list2cmdline(args),
            image=self.full_image,
            docker_engine=docker_engine,
            task_is_system=True,
        )

    def cleanup_after_run(self):
        # this run was submitted by task_run_async - we need to cleanup ourself
        if not environ_enabled(ENV_DBND_AUTO_REMOVE_POD):
            return
        if ENV_DBND_POD_NAME in environ and ENV_DBND_POD_NAMESPACE in environ:
            try:
                logger.warning(
                    "Auto deleteing pod as accordingly to '%s' env variable" %
                    ENV_DBND_AUTO_REMOVE_POD)
                kube_dbnd = self.build_kube_dbnd()
                kube_dbnd.delete_pod(
                    name=environ[ENV_DBND_POD_NAME],
                    namespace=environ[ENV_DBND_POD_NAMESPACE],
                )
            except Exception as e:
                logger.warning("Tried to delete this pod but failed: %s" % e)
        else:
            logger.warning(
                "Auto deleting pod as set, but pod name and pod namespace is not defined"
            )

    def get_dashboard_link(self, pod_namespace: str,
                           pod_name: str) -> Optional[str]:
        if not self.dashboard_url:
            return None
        try:
            return self.dashboard_url.format(namespace=pod_namespace,
                                             pod=pod_name)
        except Exception:
            logger.exception("Failed to generate dashboard url from %s" %
                             self.dashboard_url)
        return None

    def get_pod_log_link(self, pod_namespace: str,
                         pod_name: str) -> Optional[str]:
        if not self.pod_log_url:
            return None
        try:
            return self.pod_log_url.format(
                namespace=pod_namespace,
                pod=pod_name,
                timestamp=datetime.datetime.now().isoformat(),
            )
        except Exception:
            logger.exception("Internal error on generating pod log url")
        return None

    def get_kube_client(self, in_cluster=None):
        from kubernetes import client, config

        # if in_cluster is set to None, we set it dynamically by trying to set
        # the k8s's config as if we are in a cluster, and if it fails, we set it
        # as we are not running in a cluster.
        if in_cluster is None:
            try:
                config.load_incluster_config()
                self.in_cluster = True
            except ConfigException:
                try:
                    config.load_kube_config(config_file=self.config_file,
                                            context=self.cluster_context)
                except ConfigException as e:
                    raise friendly_error.executor_k8s.failed_to_load_config_file(
                        e)
                self.in_cluster = False
        else:
            try:
                if in_cluster:
                    config.load_incluster_config()
                else:
                    config.load_kube_config(config_file=self.config_file,
                                            context=self.cluster_context)
            except ConfigException as e:
                raise friendly_error.executor_k8s.failed_to_connect_to_cluster(
                    self.in_cluster, e)

        if PY2:
            # For connect_get_namespaced_pod_exec
            from kubernetes.client import Configuration

            configuration = Configuration()
            configuration.assert_hostname = False
            Configuration.set_default(configuration)
        return client.CoreV1Api()

    def build_kube_dbnd(self, in_cluster=None):
        from dbnd_docker.kubernetes.kube_dbnd_client import DbndKubernetesClient

        kube_client = self.get_kube_client(in_cluster=in_cluster)
        kube_dbnd = DbndKubernetesClient(kube_client=kube_client,
                                         engine_config=self)
        return kube_dbnd

    def get_pod_name(self, task_run, try_number):
        pod_name = create_pod_id(task_run)
        if try_number is not None:
            pod_name = "%s-%s" % (pod_name, try_number)
        return pod_name

    def build_pod(
        self,
        task_run: TaskRun,
        cmds: List[str],
        args: Optional[List[str]] = None,
        labels: Optional[Dict[str, str]] = None,
        try_number: Optional[int] = None,
        include_system_secrets: bool = False,
    ) -> k8s.V1Pod:
        if not self.container_tag:
            raise DatabandConfigError(
                "Your container tag is None, please check your configuration",
                help_msg="Container tag should be assigned",
            )

        pod_name = self.get_pod_name(task_run=task_run, try_number=try_number)

        image = self.full_image
        labels = combine_mappings(labels, self.labels)
        labels["pod_name"] = pod_name

        labels["dbnd_run_uid"] = task_run.run.run_uid
        labels["dbnd_task_run_uid"] = task_run.task_run_uid
        labels["dbnd_task_run_attempt_uid"] = task_run.task_run_attempt_uid
        labels[
            "dbnd_task_family"] = task_run.task.task_definition.full_task_family_short
        labels["dbnd_task_name"] = task_run.task.task_name
        labels["dbnd_task_af_id"] = task_run.task_af_id

        # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>)
        if task_run.task.task_is_system:
            labels["dbnd"] = "dbnd_system_task_run"
        else:
            labels["dbnd"] = "task_run"

        # we need to be sure that the values meet the dns label names RFC
        # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
        labels = {
            label_name: clean_label_name_dns1123(str(label_value))
            for label_name, label_value in six.iteritems(labels)
        }
        if is_verbose():
            logger.info("Build pod with kubernetes labels {}".format(labels))

        annotations = self.annotations.copy()
        if self.gcp_service_account_keys:
            annotations[
                "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys
        annotations["dbnd_tracker"] = task_run.task_tracker_url

        from dbnd_docker.kubernetes.vendorized_airflow.dbnd_extended_resources import (
            DbndExtendedResources, )

        resources = DbndExtendedResources(
            requests=self.requests,
            limits=self.limits,
            request_memory=self.request_memory,
            request_cpu=self.request_cpu,
            limit_memory=self.limit_memory,
            limit_cpu=self.limit_cpu,
        )
        env_vars = {
            ENV_DBND_POD_NAME: pod_name,
            ENV_DBND_POD_NAMESPACE: self.namespace,
            ENV_DBND_USER: task_run.task_run_env.user,
            ENV_DBND__ENV_IMAGE: image,
            ENV_DBND_ENV: task_run.run.env.task_name,
            ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace),
        }

        if AIRFLOW_VERSION_2:
            env_vars[
                "AIRFLOW__CORE__TASK_RUNNER"] = "dbnd_airflow.compat.dbnd_task_runner.DbndStandardTaskRunner"

        if self.auto_remove:
            env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True"
        env_vars[self._params.get_param_env_key(self, "in_cluster")] = "True"
        env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True"
        env_vars[
            "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version
        env_vars["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
        if not get_dbnd_project_config().is_tracking_mode():
            env_vars[ENV_DBND__TRACKING] = "False"
        # we want that all next runs will be able to use the image that we have in our configuration

        env_vars.update(
            self._params.to_env_map(self, "container_repository",
                                    "container_tag"))

        env_vars.update(self.env_vars)
        env_vars.update(task_run.run.get_context_spawn_env())

        secrets = self.get_secrets(
            include_system_secrets=include_system_secrets)

        if self.trap_exit_file_flag:
            args = [
                textwrap.dedent("""
                trap "touch {trap_file}" EXIT
                {command}
                """.format(
                    trap_file=self.trap_exit_file_flag,
                    command=subprocess.list2cmdline(cmds),
                ))
            ]
            # we update cmd now
            cmds = ["/bin/bash", "-c"]

        if self.debug_with_command:
            logger.warning(
                "%s replacing pod %s command with '%s', original command=`%s`",
                task_run,
                pod_name,
                self.debug_with_command,
                subprocess.list2cmdline(cmds),
            )
            cmds = shlex.split(self.debug_with_command)

        base_pod = self._build_base_pod()

        pod = self._to_real_pod(
            cmds=cmds,
            args=args,
            namespace=self.namespace,
            name=pod_name,
            envs=env_vars,
            image=image,
            labels=labels,
            secrets=secrets,
            resources=resources,
            annotations=annotations,
        )

        final_pod = reconcile_pods(base_pod, pod)

        return final_pod

    def _to_real_pod(
        self,
        cmds: List[str],
        args: List[str],
        namespace: str,
        name: str,
        image: str,
        envs: Dict[str, str],
        labels: Dict[str, str],
        annotations: Dict[str, str],
        resources: "DbndExtendedResources",
        secrets: List["Secret"],
    ) -> k8s.V1Pod:

        # TODO add yaml template as basis
        BASE_CONTAINER_NAME = "base"
        kc: KubernetesEngineConfig = self
        meta = k8s.V1ObjectMeta(labels=labels,
                                name=name,
                                namespace=namespace,
                                annotations=annotations)
        if kc.image_pull_secrets:
            image_pull_secrets = [
                k8s.V1LocalObjectReference(i)
                for i in kc.image_pull_secrets.split(",")
            ]
        else:
            image_pull_secrets = []
        spec = k8s.V1PodSpec(
            # init_containers=kc.init_containers,
            containers=[
                k8s.V1Container(
                    image=image,
                    command=cmds,
                    env_from=[],
                    name=BASE_CONTAINER_NAME,
                    env=[
                        k8s.V1EnvVar(name=key, value=val)
                        for key, val in envs.items()
                    ],
                    args=args,
                    image_pull_policy=kc.image_pull_policy,
                )
            ],
            image_pull_secrets=image_pull_secrets,
            service_account_name=kc.service_account_name,
            node_selector=kc.node_selectors,
            # dns_policy=kc.dnspolicy,
            host_network=kc.hostnetwork,
            tolerations=kc.tolerations,
            affinity=kc.affinity,
            security_context=kc.security_context,
        )

        k8_pod = k8s.V1Pod(spec=spec, metadata=meta)
        for configmap_name in kc.configmaps:
            env_var = k8s.V1EnvFromSource(
                config_map_ref=k8s.V1ConfigMapEnvSource(name=configmap_name))
            k8_pod.spec.containers[0].env_from.append(env_var)

        volumes = kc.volumes or []
        for volume in volumes:
            k8_pod = volume_shims.attach_to_pod(k8_pod, volume)

        mounts = kc.volume_mounts or []
        for volume_mount in mounts:
            k8_pod = attach_volume_mount(k8_pod, volume_mount)

        secret: Secret
        for secret in secrets:
            if AIRFLOW_ABOVE_10:
                k8_pod = secret.attach_to_pod(k8_pod)
            else:
                k8_pod = attach_to_pod(secret, k8_pod)

        k8_pod = resources.attach_to_pod(k8_pod)
        return k8_pod

    def _build_base_pod(self) -> k8s.V1Pod:
        from kubernetes.client import ApiClient

        basis_pod_yaml = target(self.pod_yaml).read()
        basis_pod_dict = yaml.safe_load(basis_pod_yaml) or {}
        api_client = ApiClient()
        return api_client._ApiClient__deserialize_model(
            basis_pod_dict, k8s.V1Pod)

    def get_secrets(self, include_system_secrets=True) -> List["Secret"]:
        """Defines any necessary secrets for the pod executor"""

        from dbnd_airflow.compat.airflow_multi_version_shim import Secret

        result = []
        if include_system_secrets:
            secrets = self.system_secrets + self.secrets
        else:
            secrets = self.secrets
        for secret_data in secrets:
            result.append(
                Secret(
                    deploy_type=secret_data.get("type"),
                    deploy_target=secret_data.get("target"),
                    secret=secret_data.get("secret"),
                    key=secret_data.get("key"),
                ))

        return result

    def build_kube_pod_req(self, pod: k8s.V1Pod) -> Dict[str, Any]:
        from kubernetes.client import ApiClient

        return ApiClient().sanitize_for_serialization(pod)

    # TODO: [#2] add them in-place?
    def apply_env_vars_to_pod(self, pod):
        pod.envs["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
        if not get_dbnd_project_config().is_tracking_mode():
            pod.envs[ENV_DBND__TRACKING] = "False"