def test_clean_job_name_postfix_characters(self): job_id = "6a8330cc" assert ( clean_job_name_dns1123( "dbnd.tttttttt-operator.t-t-t-training-with-t-sssss-session-", postfix=".%s" % job_id, ) == "dbnd.tttttttt-operator.t-t-t-training-with-t-sssss-session.6a8330cc" )
def build_pod( self, task_run, cmds, args=None, labels=None, try_number=None, include_system_secrets=False, ): # type: (TaskRun, List[str], Optional[List[str]], Optional[Dict[str,str]], Optional[int]) ->Pod pod_name = self.get_pod_name(task_run=task_run, try_number=try_number) image = self.full_image labels = combine_mappings(labels, self.labels) labels["dbnd_run_uid"] = clean_job_name_dns1123( str(task_run.run.run_uid)) labels["dbnd_task_run_uid"] = clean_job_name_dns1123( str(task_run.task_run_uid)) labels[ "dbnd"] = "task_run" # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>) annotations = self.annotations.copy() if self.gcp_service_account_keys: annotations[ "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys annotations["dbnd_tracker"] = task_run.task_tracker_url from dbnd_docker.kubernetes.dbnd_extended_resources import DbndExtendedResources resources = DbndExtendedResources( requests=self.requests, limits=self.limits, request_memory=self.request_memory, request_cpu=self.request_cpu, limit_memory=self.limit_memory, limit_cpu=self.limit_cpu, ) env_vars = { ENV_DBND_POD_NAME: pod_name, ENV_DBND_POD_NAMESPACE: self.namespace, ENV_DBND_USER: task_run.task_run_env.user, ENV_DBND__ENV_IMAGE: image, ENV_DBND_ENV: task_run.run.env.task_name, ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace), } if self.auto_remove: env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True" env_vars[self._params.get_param_env_key("in_cluster")] = "True" env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True" env_vars[ "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version # we want that all next runs will be able to use the image that we have in our configuration env_vars.update( self._params.to_env_map("container_repository", "container_tag")) env_vars.update(self.env_vars) env_vars.update(task_run.run.get_context_spawn_env()) secrets = self.get_secrets( include_system_secrets=include_system_secrets) from airflow.contrib.kubernetes.pod import Pod if self.trap_exit_file_flag: args = [ textwrap.dedent(""" trap "touch {trap_file}" EXIT {command} """.format( trap_file=self.trap_exit_file_flag, command=subprocess.list2cmdline(cmds), )) ] # we update cmd now cmds = ["/bin/bash", "-c"] if not self.container_tag: raise DatabandConfigError( "Your container tag is None, please check your configuration", help_msg="Container tag should be assigned", ) pod = Pod( namespace=self.namespace, name=pod_name, envs=env_vars, image=image, cmds=cmds, args=args, labels=labels, image_pull_policy=self.image_pull_policy, image_pull_secrets=self.image_pull_secrets, secrets=secrets, service_account_name=self.service_account_name, volumes=self.volumes, volume_mounts=self.volume_mounts, annotations=annotations, node_selectors=self.node_selectors, affinity=self.affinity, tolerations=self.tolerations, security_context=self.security_context, configmaps=self.configmaps, hostnetwork=self.hostnetwork, resources=resources, ) if self.pod_yaml: pod.pod_yaml = target(self.pod_yaml).read() return pod
def __init__( self, task, run, task_af_id=None, try_number=1, is_dynamic=None, task_engine=None, ): # type: (Task, DatabandRun, str, int, bool, EngineConfig)-> None # actually this is used as Task uid self.task = task # type: Task self.run = run # type: DatabandRun self.task_engine = task_engine self.try_number = try_number self.is_dynamic = is_dynamic if is_dynamic is not None else task.task_is_dynamic self.is_system = task.task_is_system self.task_af_id = task_af_id or self.task.task_id if task.ctrl.force_task_run_uid: self.task_run_uid = tr_uid = task.ctrl.force_task_run_uid if isinstance(tr_uid, TaskRunUidGen): self.task_run_uid = tr_uid.generate_task_run_uid( run=run, task=task, task_af_id=self.task_af_id ) else: self.task_run_uid = get_uuid() # used by all kind of submission controllers self.job_name = clean_job_name(self.task_af_id).lower() self.job_id = self.job_name + "_" + str(self.task_run_uid)[:8] # DNS-1123 subdomain name (k8s) self.job_id__dns1123 = clean_job_name_dns1123( "dbnd.{task_family}.{task_name}".format( task_family=self.task.task_meta.task_family, task_name=self.task.task_meta.task_name, ), postfix=".%s" % str(self.task_run_uid)[:8], ) # custom per task engine , or just use one from global env dbnd_local_root = ( self.task_engine.dbnd_local_root or self.run.env.dbnd_local_root ) self.local_task_run_root = ( dbnd_local_root.folder(run.run_folder_prefix) .folder("tasks") .folder(self.task.task_id) ) self._attempt_number = 1 self.task_run_attempt_uid = get_uuid() self.attempt_folder = None self.meta_files = None self.log = None self.init_attempt() # TODO: inherit from parent task if disabled self.is_tracked = task._conf__tracked if self.is_tracked and self.run.is_tracked: tracking_store = self.run.context.tracking_store else: tracking_store = ConsoleStore() self.tracking_store = tracking_store self.tracker = TaskRunTracker(task_run=self, tracking_store=tracking_store) self.runner = TaskRunRunner(task_run=self) self.deploy = TaskSyncCtrl(task_run=self) self.task_tracker_url = self.tracker.task_run_url() self.external_resource_urls = dict() self.errors = [] self.is_root = False self.is_reused = False self.is_skipped = False # Task can be skipped as it's not required by any other task scheduled to run self.is_skipped_as_not_required = False self._airflow_context = None self._task_run_state = None self.start_time = None self.finished_time = None
def test_clean_job_name_postfix_max(self): job_id = "6a8330cc" assert (clean_job_name_dns1123("a" * 300, placeholder=r"-", postfix=".%s" % job_id) == "a" * 244 + ".6a8330cc")
def test_clean_job_name_postfix_2(self): job_id = "6a8330cc" assert (clean_job_name_dns1123( "driver_submit__9991469ce9.BashCmd", postfix=".%s" % job_id) == "driver-submit-9991469ce9.bash-cmd.6a8330cc")
def test_clean_job_name_postfix_1(self): job_id = "6a8330cc" assert (clean_job_name_dns1123( "AaBb[]1111.jobname", postfix=".%s" % job_id) == "aa-bb-1111.jobname.6a8330cc")