Beispiel #1
0
 def test_clean_job_name_postfix_characters(self):
     job_id = "6a8330cc"
     assert (
         clean_job_name_dns1123(
             "dbnd.tttttttt-operator.t-t-t-training-with-t-sssss-session-",
             postfix=".%s" % job_id,
         ) ==
         "dbnd.tttttttt-operator.t-t-t-training-with-t-sssss-session.6a8330cc"
     )
Beispiel #2
0
    def build_pod(
        self,
        task_run,
        cmds,
        args=None,
        labels=None,
        try_number=None,
        include_system_secrets=False,
    ):
        # type: (TaskRun, List[str], Optional[List[str]], Optional[Dict[str,str]], Optional[int]) ->Pod
        pod_name = self.get_pod_name(task_run=task_run, try_number=try_number)

        image = self.full_image
        labels = combine_mappings(labels, self.labels)
        labels["dbnd_run_uid"] = clean_job_name_dns1123(
            str(task_run.run.run_uid))
        labels["dbnd_task_run_uid"] = clean_job_name_dns1123(
            str(task_run.task_run_uid))
        labels[
            "dbnd"] = "task_run"  # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>)

        annotations = self.annotations.copy()
        if self.gcp_service_account_keys:
            annotations[
                "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys
        annotations["dbnd_tracker"] = task_run.task_tracker_url

        from dbnd_docker.kubernetes.dbnd_extended_resources import DbndExtendedResources

        resources = DbndExtendedResources(
            requests=self.requests,
            limits=self.limits,
            request_memory=self.request_memory,
            request_cpu=self.request_cpu,
            limit_memory=self.limit_memory,
            limit_cpu=self.limit_cpu,
        )
        env_vars = {
            ENV_DBND_POD_NAME: pod_name,
            ENV_DBND_POD_NAMESPACE: self.namespace,
            ENV_DBND_USER: task_run.task_run_env.user,
            ENV_DBND__ENV_IMAGE: image,
            ENV_DBND_ENV: task_run.run.env.task_name,
            ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace),
        }
        if self.auto_remove:
            env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True"
        env_vars[self._params.get_param_env_key("in_cluster")] = "True"
        env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True"
        env_vars[
            "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version

        # we want that all next runs will be able to use the image that we have in our configuration

        env_vars.update(
            self._params.to_env_map("container_repository", "container_tag"))

        env_vars.update(self.env_vars)
        env_vars.update(task_run.run.get_context_spawn_env())

        secrets = self.get_secrets(
            include_system_secrets=include_system_secrets)

        from airflow.contrib.kubernetes.pod import Pod

        if self.trap_exit_file_flag:
            args = [
                textwrap.dedent("""
                trap "touch {trap_file}" EXIT
                {command}
                """.format(
                    trap_file=self.trap_exit_file_flag,
                    command=subprocess.list2cmdline(cmds),
                ))
            ]
            # we update cmd now
            cmds = ["/bin/bash", "-c"]

        if not self.container_tag:
            raise DatabandConfigError(
                "Your container tag is None, please check your configuration",
                help_msg="Container tag should be assigned",
            )

        pod = Pod(
            namespace=self.namespace,
            name=pod_name,
            envs=env_vars,
            image=image,
            cmds=cmds,
            args=args,
            labels=labels,
            image_pull_policy=self.image_pull_policy,
            image_pull_secrets=self.image_pull_secrets,
            secrets=secrets,
            service_account_name=self.service_account_name,
            volumes=self.volumes,
            volume_mounts=self.volume_mounts,
            annotations=annotations,
            node_selectors=self.node_selectors,
            affinity=self.affinity,
            tolerations=self.tolerations,
            security_context=self.security_context,
            configmaps=self.configmaps,
            hostnetwork=self.hostnetwork,
            resources=resources,
        )

        if self.pod_yaml:
            pod.pod_yaml = target(self.pod_yaml).read()

        return pod
Beispiel #3
0
    def __init__(
        self,
        task,
        run,
        task_af_id=None,
        try_number=1,
        is_dynamic=None,
        task_engine=None,
    ):
        # type: (Task, DatabandRun, str, int, bool, EngineConfig)-> None
        # actually this is used as Task uid

        self.task = task  # type: Task
        self.run = run  # type: DatabandRun
        self.task_engine = task_engine
        self.try_number = try_number
        self.is_dynamic = is_dynamic if is_dynamic is not None else task.task_is_dynamic
        self.is_system = task.task_is_system
        self.task_af_id = task_af_id or self.task.task_id

        if task.ctrl.force_task_run_uid:
            self.task_run_uid = tr_uid = task.ctrl.force_task_run_uid
            if isinstance(tr_uid, TaskRunUidGen):
                self.task_run_uid = tr_uid.generate_task_run_uid(
                    run=run, task=task, task_af_id=self.task_af_id
                )
        else:
            self.task_run_uid = get_uuid()

        # used by all kind of submission controllers
        self.job_name = clean_job_name(self.task_af_id).lower()
        self.job_id = self.job_name + "_" + str(self.task_run_uid)[:8]

        # DNS-1123 subdomain name (k8s)
        self.job_id__dns1123 = clean_job_name_dns1123(
            "dbnd.{task_family}.{task_name}".format(
                task_family=self.task.task_meta.task_family,
                task_name=self.task.task_meta.task_name,
            ),
            postfix=".%s" % str(self.task_run_uid)[:8],
        )

        # custom per task engine , or just use one from global env
        dbnd_local_root = (
            self.task_engine.dbnd_local_root or self.run.env.dbnd_local_root
        )
        self.local_task_run_root = (
            dbnd_local_root.folder(run.run_folder_prefix)
            .folder("tasks")
            .folder(self.task.task_id)
        )

        self._attempt_number = 1
        self.task_run_attempt_uid = get_uuid()
        self.attempt_folder = None
        self.meta_files = None
        self.log = None
        self.init_attempt()

        # TODO: inherit from parent task if disabled
        self.is_tracked = task._conf__tracked

        if self.is_tracked and self.run.is_tracked:
            tracking_store = self.run.context.tracking_store
        else:
            tracking_store = ConsoleStore()

        self.tracking_store = tracking_store
        self.tracker = TaskRunTracker(task_run=self, tracking_store=tracking_store)
        self.runner = TaskRunRunner(task_run=self)
        self.deploy = TaskSyncCtrl(task_run=self)
        self.task_tracker_url = self.tracker.task_run_url()
        self.external_resource_urls = dict()
        self.errors = []

        self.is_root = False
        self.is_reused = False
        self.is_skipped = False
        # Task can be skipped as it's not required by any other task scheduled to run
        self.is_skipped_as_not_required = False

        self._airflow_context = None
        self._task_run_state = None

        self.start_time = None
        self.finished_time = None
Beispiel #4
0
 def test_clean_job_name_postfix_max(self):
     job_id = "6a8330cc"
     assert (clean_job_name_dns1123("a" * 300,
                                    placeholder=r"-",
                                    postfix=".%s" % job_id) == "a" * 244 +
             ".6a8330cc")
Beispiel #5
0
 def test_clean_job_name_postfix_2(self):
     job_id = "6a8330cc"
     assert (clean_job_name_dns1123(
         "driver_submit__9991469ce9.BashCmd", postfix=".%s" %
         job_id) == "driver-submit-9991469ce9.bash-cmd.6a8330cc")
Beispiel #6
0
 def test_clean_job_name_postfix_1(self):
     job_id = "6a8330cc"
     assert (clean_job_name_dns1123(
         "AaBb[]1111.jobname",
         postfix=".%s" % job_id) == "aa-bb-1111.jobname.6a8330cc")