Exemple #1
0
 def test_tracking_after_flush(self, fake_api_request):
     ctx = get_databand_context()
     async_store = TrackingStoreThroughChannel.build_with_async_web_channel(
         ctx)
     async_store.heartbeat(get_uuid())
     async_store.flush()
     async_store.heartbeat(get_uuid())
     async_store.flush()
    def _handle(self, name, data):
        schema = self.get_schema_by_handler_name(name)

        labels = {
            "sender": "TrackingProtoWebChannel",
            "source_version": self.source_version,
            # TODO: add env details
        }

        event = Event(uuid=str(get_uuid()),
                      schema=str_type(schema),
                      labels=labels)
        event.data.update(data)

        post_event_request = PostEventsRequest()
        post_event_request.events.append(event)
        post_event_request.timestamp.GetCurrentTime()

        data = post_event_request.SerializeToString()

        encoded_response_str = self.client.api_request("tracking/proto", data)
        b64encoded_response_bytes = encoded_response_str.encode("utf-8")
        raw_bytes = base64.b64decode(b64encoded_response_bytes)

        post_event_response = PostEventsResponse()
        post_event_response.ParseFromString(raw_bytes)

        if post_event_response.exception:
            raise Exception("Response error: %s" %
                            post_event_response.exception)

        return post_event_response.responses[event.uuid]
Exemple #3
0
 def init_attempt(self):
     self.task_run_attempt_uid = get_uuid()
     self.attempt_folder = self.task._meta_output.folder(
         "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid),
         extension=None,
     )
     self.meta_files = TaskRunMetaFiles(self.attempt_folder)
     self.log = TaskRunLogManager(task_run=self)
Exemple #4
0
    def test_thread_not_started_immideately(self, fake_api_request):
        ctx = get_databand_context()
        async_store = TrackingStoreThroughChannel.build_with_async_web_channel(
            ctx)
        assert async_store.is_ready()
        assert not async_store.channel._background_worker.is_alive

        async_store.heartbeat(get_uuid())
        assert async_store.channel._background_worker.is_alive
Exemple #5
0
    def __init__(self, task_class, classdict):
        super(TaskDefinition, self).__init__()

        self.task_definition_uid = get_uuid()
        self.hidden = False

        self.task_class = task_class  # type: Type[Task]

        self.task_passport = TaskPassport.from_task_cls(task_class)

        # TODO: maybe use properties or other way to delegate those...
        self.full_task_family = self.task_passport.full_task_family
        self.full_task_family_short = self.task_passport.full_task_family_short
        self.task_family = self.task_passport.task_family
        self.task_config_section = self.task_passport.task_config_section

        # all the attributes that points to_Parameter
        self.task_params = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_params, self.defaults = self._calculate_task_class_values(classdict)

        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(self.task_params)

        defaults = {
            p.name: p.default
            for p in self.task_params.values()
            if is_defined(p.default)
        }
        self.task_defaults_config_store = parse_and_build_config_store(
            source=self.task_passport.format_source_name("defaults"),
            config_values={self.task_config_section: defaults},
            set_if_not_exists_only=True,
        )

        self.task_defaults_config_store.update(
            parse_and_build_config_store(
                source=self.task_passport.format_source_name("defaults_section"),
                config_values=self.defaults,
            )
        )
        # now, if we have overloads in code ( calculated in task_definition):
        # class T(BaseT):
        #     some_base_t_property = new_value
        if self.task_class._conf__track_source_code:
            self.task_source_code = _get_task_source_code(self.task_class)
            self.task_module_code = _get_task_module_source_code(self.task_class)
            self.task_source_file = _get_source_file(self.task_class)
        else:
            self.task_source_code = None
            self.task_module_code = ""
            self.task_source_file = None
Exemple #6
0
 def test_no_skip_after_failure(self, fake_api_request):
     with new_dbnd_context(
             conf={
                 "core": {
                     "tracker_raise_on_error": False
                 },
                 "databand": {
                     "verbose": True
                 },
             }) as ctx:
         with patch.object(TrackingAsyncWebChannel,
                           "_background_worker_skip_processing_callback"
                           ) as fake_skip:
             async_store = TrackingStoreThroughChannel.build_with_async_web_channel(
                 ctx)
             fake_api_request.side_effect = DatabandWebserverNotReachableError(
                 "fake_message")
             async_store.heartbeat(get_uuid())  # fail here
             async_store.heartbeat(get_uuid())  # no skip here
             async_store.flush()
             fake_skip.assert_not_called()
Exemple #7
0
    def set_context(self, ti):
        """
        Airflow's log handler use this method to setup the context when running a TaskInstance(=ti).
        We use this method to setup the dbnd context and communicate information to
        the `<airflow_operator>_execute` task, that we create in `execute_tracking.py`.
        """
        # we setting up only when we are not in our own orchestration dag
        if ti.dag_id.startswith(AD_HOC_DAG_PREFIX):
            return

        if config.getboolean("mlflow_tracking", "databand_tracking"):
            self.airflow_logger.warning(
                "dbnd can't track mlflow and airflow together please disable dbnd config "
                "`databand_tracking` in section `mlflow_tracking`")
            return

        # we are not tracking SubDagOperator
        if ti.operator == SubDagOperator.__name__:
            return

        task_key = calc_task_run_attempt_key_from_af_ti(ti)
        env_attempt_uid = os.environ.get(task_key)

        # This key is already set which means we are in --raw run
        if env_attempt_uid:
            # no need for further actions inside --raw run
            return

        # communicate the task_run_attempt_uid to inner processes
        # will be used for the task_run of `<airflow_operator>_execute` task
        self.task_run_attempt_uid = get_uuid()
        self.task_env_key = task_key
        os.environ[self.task_env_key] = str(self.task_run_attempt_uid)

        # airflow calculation for the relevant log_file
        log_relative_path = self.log_file_name_factory(ti, ti.try_number)
        self.log_file = os.path.join(self.airflow_base_log_dir,
                                     log_relative_path)

        # make sure we are not polluting the airflow logs
        get_dbnd_project_config().quiet_mode = True

        # tracking msg
        self.airflow_logger.info(
            "Tracked by Databand {version}".format(version=dbnd.__version__))

        # context with disabled logs
        self.dbnd_context_manage = new_dbnd_context(
            conf={"log": {
                "disabled": True
            }})
        self.dbnd_context = self.dbnd_context_manage.__enter__()
Exemple #8
0
    def init_new_task_run_attempt(self):
        # trying to find if we should use attempt_uid that been set from external process.
        # if so - the attempt_uid is uniquely for this task_run_attempt, and that why we pop.
        attempt_id = try_pop_attempt_id_from_env(self.task)
        if attempt_id:
            self.task_run_attempt_uid = UUID(attempt_id)
        else:
            self.task_run_attempt_uid = get_uuid()

        self.attempt_folder = self.task._meta_output.folder(
            "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid),
            extension=None,
        )
        self.attempt_folder_local = self.local_task_run_root.folder(
            "attempt_%s_%s" % (self.attempt_number, self.task_run_attempt_uid),
            extension=None,
        )
        self.attemp_folder_local_cache = self.attempt_folder_local.folder(
            "cache")
        self.meta_files = TaskRunMetaFiles(self.attempt_folder)
        self.log = TaskRunLogManager(task_run=self)
Exemple #9
0
    def build_task_run_info(self):
        task_run_env_uid = get_uuid()
        import dbnd

        logging.debug("Created new task run env with uid '%s'", task_run_env_uid)

        machine = environ.get(ENV_DBND__ENV_MACHINE, "")
        if environ.get(ENV_DBND__ENV_IMAGE, None):
            machine += " image=%s" % environ.get(ENV_DBND__ENV_IMAGE)
        return TaskRunEnvInfo(
            uid=task_run_env_uid,
            databand_version=dbnd.__version__,
            user_code_version=self.source_version,
            user_code_committed=True,
            cmd_line=subprocess.list2cmdline(sys.argv),
            user=self.user or dbnd_getuser(),
            machine=machine,
            project_root=project_path(),
            user_data=safe_string(self.user_data, max_value_len=500),
            heartbeat=utcnow(),
        )
Exemple #10
0
    def __init__(self, task_class, classdict, namespace_at_class_time):
        super(TaskDefinition, self).__init__()

        self.task_definition_uid = get_uuid()
        self.hidden = False

        self.task_class = task_class  # type: Type[Task]
        self.namespace_at_class_time = namespace_at_class_time
        if self.task_class._conf__decorator_spec:
            cls_name = self.task_class._conf__decorator_spec.name
        else:
            cls_name = self.task_class.__name__

        self.full_task_family = "%s.%s" % (task_class.__module__, cls_name)
        self.full_task_family_short = "%s.%s" % (
            _short_name(task_class.__module__),
            cls_name,
        )

        self.task_family = self._build_user_task_family()
        if not self.task_family:
            self.task_family = cls_name
            self.task_config_section = self.full_task_family
        else:
            self.task_config_section = self.task_family

        # all the attributes that points to_Parameter
        self.task_params = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_params, self.defaults = self._calculate_task_class_values(
            classdict)

        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(
            self.task_params)

        defaults = {
            p.name: p.default
            for p in self.task_params.values() if is_defined(p.default)
        }
        self.task_defaults_config_store = parse_and_build_config_store(
            source="%s[defaults]" % self.full_task_family_short,
            config_values={self.task_config_section: defaults},
            set_if_not_exists_only=True,
        )

        self.task_defaults_config_store.update(
            parse_and_build_config_store(
                source="%s[defaults_section]" % self.full_task_family_short,
                config_values=self.defaults,
            ))
        # now, if we have overloads in code ( calculated in task_definition):
        # class T(BaseT):
        #     some_base_t_property = new_value
        if self.task_class._conf__track_source_code:
            self.task_source_code = _get_task_source_code(self.task_class)
            self.task_module_code = _get_task_module_source_code(
                self.task_class)
            self.task_source_file = _get_source_file(self.task_class)
        else:

            self.task_source_code = None
            self.task_module_code = ""
            self.task_source_file = None
Exemple #11
0
    def __init__(
        self,
        context,
        task_or_task_name,
        run_uid=None,
        scheduled_run_info=None,
        send_heartbeat=True,
        existing_run=None,
        job_name=None,
        source=UpdateSource.dbnd,
        af_context=None,
    ):
        # type:(DatabandContext, Union[Task, str] , Optional[UUID], Optional[ScheduledRunInfo], Optional[bool], Optional[UpdateSource]) -> None
        self.context = context
        s = self.context.settings  # type: DatabandSettings

        if isinstance(task_or_task_name, six.string_types):
            self.root_task_name = task_or_task_name
            self.root_task = None
        elif isinstance(task_or_task_name, Task):
            self.root_task_name = task_or_task_name.task_name
            self.root_task = task_or_task_name
        else:
            raise

        self.job_name = job_name or self.root_task_name

        self.description = s.run.description
        self.is_archived = s.run.is_archived
        self.source = source
        # this was added to allow the scheduler to create the run which will be continued by the actually run command instead of having 2 separate runs
        if not run_uid and DBND_RUN_UID in os.environ:
            # we pop so if this run spawnes subprocesses with their own runs they will be associated using the sub-runs mechanism instead
            # of being fused into this run directly
            run_uid = os.environ.pop(DBND_RUN_UID)
        if run_uid:
            self.run_uid = run_uid
            self.existing_run = True
        else:
            self.run_uid = get_uuid()
            self.existing_run = False

        if existing_run is not None:
            self.existing_run = existing_run

        self.name = s.run.name or get_name_for_uid(self.run_uid)
        # this is so the scheduler can create a run with partial information and then have the subprocess running the actual cmd fill in the details
        self.resubmit_run = (DBND_RESUBMIT_RUN in os.environ
                             and os.environ.pop(DBND_RESUBMIT_RUN) == "true")

        # AIRFLOW, move into executor
        # dag_id , execution_date and run_id is used by airflow
        self.dag_id = AD_HOC_DAG_PREFIX + self.root_task_name
        self.execution_date = unique_execution_date()
        run_id = s.run.id
        if not run_id:
            # we need this name, otherwise Airflow will try to manage our local jobs at scheduler
            # ..zombies cleanup and so on
            run_id = "backfill_{0}_{1}".format(self.name,
                                               self.execution_date.isoformat())
        self.run_id = run_id

        self._template_vars = self._build_template_vars()

        self.is_tracked = True

        self.runtime_errors = []
        self._run_state = None
        self.task_runs = []  # type: List[TaskRun]
        self.task_runs_by_id = {}
        self.task_runs_by_af_id = {}

        self.target_origin = TargetIdentitySourceMap()
        self.describe = DescribeRun(self)
        self.tracker = RunTracker(self,
                                  tracking_store=self.context.tracking_store)

        # ALL RUN CONTEXT SPECIFIC thing
        self.root_run_info = RootRunInfo.from_env(current_run=self)
        self.scheduled_run_info = scheduled_run_info or ScheduledRunInfo.from_env(
            self.run_uid)

        # now we can add driver task
        self.driver_task_run = None  # type: Optional[TaskRun]
        self.root_task_run = None  # type: Optional[TaskRun]

        self.run_folder_prefix = os.path.join(
            "log",
            self.execution_date.strftime("%Y-%m-%d"),
            "%s_%s_%s" % (
                self.execution_date.strftime("%Y-%m-%dT%H%M%S.%f"),
                self.root_task_name,
                self.name,
            ),
        )

        self.run_config = self.context.settings.run  # type: RunConfig
        self.env = env = self.context.env

        self.local_engine = self._get_engine_config(env.local_engine)
        self.remote_engine = self._get_engine_config(env.remote_engine
                                                     or env.local_engine)

        self.submit_driver = (self.run_config.submit_driver
                              if self.run_config.submit_driver is not None else
                              env.submit_driver)
        self.submit_tasks = (self.run_config.submit_tasks
                             if self.run_config.submit_tasks is not None else
                             env.submit_tasks)
        self.task_executor_type, self.parallel = calculate_task_executor_type(
            self.submit_tasks, self.remote_engine, self.context.settings)

        self.sends_heartbeat = send_heartbeat
        self.dynamic_af_tasks_count = dict()
        self.af_context = af_context
        self.start_time = None
        self.finished_time = None
Exemple #12
0
    def run_submitter(self):
        """
        This is the task that represents "submission"
        it can just one task, or.. more tasks, as we can have "docker builds" or other preparations
        this is why we will not run it directly, but do a "full run" with executor

        """
        run = self.run
        # we are running submitter, that will send driver to remote
        remote_engine = self.remote_engine

        settings = run.context.settings
        settings.git.validate_git_policy()

        # let prepare for remote execution
        remote_engine.prepare_for_run(run)

        result_map_target = run.run_root.file("{}.json".format(get_uuid()))
        cmd_line_args = (["run"] + _get_dbnd_run_relative_cmd() + [
            "--run-driver",
            str(run.run_uid),
            "--set",
            "run.run_result_json_path={}".format(result_map_target.path),
            "--set",
            "run.execution_date={}".format(
                run.execution_date.strftime("%Y-%m-%dT%H%M%S.%f")),
        ])

        args = remote_engine.dbnd_executable + cmd_line_args
        submit_to_engine_task = remote_engine.submit_to_engine_task(
            env=run.env,
            args=args,
            task_name="dbnd_driver_run",
            interactive=settings.run.interactive,
        )
        submit_to_engine_task._conf_confirm_on_kill_msg = (
            "Ctrl-C Do you want to kill your submitted pipeline?"
            "If selection is 'no', this process will detach from the run.")
        run.root_task = submit_to_engine_task

        # we run all tasks on local engine
        task_runs = self._init_task_runs_for_execution(
            task_engine=self.host_engine)

        # create executor without driver task!
        # We use local executor to run all tasks (submit_to_engine and required by it tasks)
        # In most cases it will run only submit_to_engine task,
        # But there are scenarios when submit_to_engine task asks for docker builds
        # so we execute the whole pipeline.
        task_executor = LocalTaskExecutor(
            run,
            task_executor_type=TaskExecutorType.local,
            host_engine=self.host_engine,
            target_engine=self.host_engine,
            task_runs=task_runs,
        )

        task_executor.do_run()
        self.result_location = result_map_target

        logger.info(run.describe.run_banner_for_submitted())
Exemple #13
0
    def __init__(
        self,
        task,
        run,
        task_af_id=None,
        try_number=1,
        is_dynamic=None,
        task_engine=None,
    ):
        # type: (Task, DatabandRun, str, int, bool, EngineConfig)-> None
        # actually this is used as Task uid

        self.task = task  # type: Task
        self.run = run  # type: DatabandRun
        self.task_engine = task_engine
        self.try_number = try_number
        self.is_dynamic = is_dynamic if is_dynamic is not None else task.task_is_dynamic
        self.is_system = task.task_is_system
        self.task_af_id = task_af_id or self.task.task_id

        if task.ctrl.force_task_run_uid:
            self.task_run_uid = tr_uid = task.ctrl.force_task_run_uid
            if isinstance(tr_uid, TaskRunUidGen):
                self.task_run_uid = tr_uid.generate_task_run_uid(
                    run=run, task=task, task_af_id=self.task_af_id
                )
        else:
            self.task_run_uid = get_uuid()

        # used by all kind of submission controllers
        self.job_name = clean_job_name(self.task_af_id).lower()
        self.job_id = self.job_name + "_" + str(self.task_run_uid)[:8]

        # DNS-1123 subdomain name (k8s)
        self.job_id__dns1123 = clean_job_name_dns1123(
            "dbnd.{task_family}.{task_name}".format(
                task_family=self.task.task_meta.task_family,
                task_name=self.task.task_meta.task_name,
            ),
            postfix=".%s" % str(self.task_run_uid)[:8],
        )

        # custom per task engine , or just use one from global env
        dbnd_local_root = (
            self.task_engine.dbnd_local_root or self.run.env.dbnd_local_root
        )
        self.local_task_run_root = (
            dbnd_local_root.folder(run.run_folder_prefix)
            .folder("tasks")
            .folder(self.task.task_id)
        )

        self._attempt_number = 1
        self.task_run_attempt_uid = get_uuid()
        self.attempt_folder = None
        self.meta_files = None
        self.log = None
        self.init_attempt()

        # TODO: inherit from parent task if disabled
        self.is_tracked = task._conf__tracked

        if self.is_tracked and self.run.is_tracked:
            tracking_store = self.run.context.tracking_store
        else:
            tracking_store = ConsoleStore()

        self.tracking_store = tracking_store
        self.tracker = TaskRunTracker(task_run=self, tracking_store=tracking_store)
        self.runner = TaskRunRunner(task_run=self)
        self.deploy = TaskSyncCtrl(task_run=self)
        self.task_tracker_url = self.tracker.task_run_url()
        self.external_resource_urls = dict()
        self.errors = []

        self.is_root = False
        self.is_reused = False
        self.is_skipped = False
        # Task can be skipped as it's not required by any other task scheduled to run
        self.is_skipped_as_not_required = False

        self._airflow_context = None
        self._task_run_state = None

        self.start_time = None
        self.finished_time = None
Exemple #14
0
 def generate_task_run_uid(self, run, task, task_af_id):
     return get_uuid()
Exemple #15
0
    def __init__(
            self,
            task_passport,  # type: TaskPassport
            classdict=None,  # type: Optional[Dict[str, Any]]
            base_task_definitions=None,  # type: Optional[List[TaskDefinition]]
            defaults=None,  # type: Optional[Dict[ParameterDefinition, Any]]
            task_decorator=None,  # type: Optional[TaskDecorator]
            source_code=None,  # type: Optional[TaskSourceCode]
            external_parameters=None,  # type: Optional[Parameters]
            task_definition_uid=None,  # type: Optional[UUID]
    ):
        super(TaskDefinition, self).__init__()

        self.hidden = False

        self.task_passport = task_passport
        self.source_code = source_code
        self.task_decorator = task_decorator
        self.base_task_definitions = (base_task_definitions
                                      or [])  # type: List[ TaskDefinition]

        # TODO: maybe use properties or other way to delegate those...
        self.full_task_family = self.task_passport.full_task_family
        self.full_task_family_short = self.task_passport.full_task_family_short
        self.task_family = self.task_passport.task_family
        self.task_config_section = self.task_passport.task_config_section

        # all the attributes that points to_Parameter
        self.task_param_defs = dict()  # type: Dict[str, ParameterDefinition]

        # the defaults attribute
        self.defaults = dict()  # type: Dict[ParameterDefinition, Any]

        self.task_param_defs = self._calculate_task_class_values(
            classdict, external_parameters)
        # if we have output params in function arguments, like   f(some_p=parameter.output)
        # the new function can not return the result of return
        self.single_result_output = self._is_result_single_output(
            self.task_param_defs)

        self.param_defaults = {
            p.name: p.default
            for p in self.task_param_defs.values() if is_defined(p.default)
        }

        # TODO: consider joining with task_config
        # TODO: calculate defaults value as _ConfigStore and merge using standard mechanism
        self.defaults = self._calculate_task_defaults(defaults)
        self.task_defaults_config_store = parse_and_build_config_store(
            source=self.task_passport.format_source_name("task.defaults"),
            config_values=self.defaults,
            priority=ConfigValuePriority.FALLBACK,
        )

        self.task_signature_extra = {}
        if config.getboolean("task_build", "sign_with_full_qualified_name"):
            self.task_signature_extra[
                "full_task_family"] = self.full_task_family
        if config.getboolean("task_build", "sign_with_task_code"):
            self.task_signature_extra[
                "task_code_hash"] = user_friendly_signature(
                    self.source_code.task_source_code)

        if task_definition_uid:
            self.task_definition_uid = task_definition_uid
        else:
            self.task_definition_uid = get_uuid()
Exemple #16
0
def _generate_unique_tracking_signature():
    return Signature("tracking", user_friendly_signature(str(get_uuid())),
                     "unique tracking call")
Exemple #17
0
    def __init__(
        self,
        context,  # type: DatabandContext
        job_name,
        run_uid=None,  # type:  Optional[UUID]
        scheduled_run_info=None,  # type:  Optional[ScheduledRunInfo]
        existing_run=None,
        source=UpdateSource.dbnd,  # type:Optional[UpdateSource]
        af_context=None,
        is_orchestration=False,
    ):
        self.context = context
        s = self.context.settings  # type: DatabandSettings

        self.job_name = job_name

        self.description = s.run.description
        self.is_archived = s.run.is_archived
        self.source = source
        self.is_orchestration = is_orchestration

        self.existing_run = existing_run or False
        # this was added to allow the scheduler to create the run which will be continued by the actually run command instead of having 2 separate runs
        if not run_uid and DBND_RUN_UID in os.environ:
            # we pop so if this run spawnes subprocesses with their own runs they will be associated using the sub-runs mechanism instead
            # of being fused into this run directly
            run_uid = os.environ.pop(DBND_RUN_UID)
        if run_uid:
            self.run_uid = run_uid
            self.existing_run = True
        else:
            self.run_uid = get_uuid()

        # if user provided name - use it
        # otherwise - generate human friendly name for the run
        self.name = s.run.name or get_random_name(seed=self.run_uid)
        self.execution_date = unique_execution_date()

        self.is_tracked = True

        # tracking/orchestration main task
        self.root_task = None  # type: Optional[Task]

        # task run that wraps execution (tracking or orchestration)
        self._driver_task_run = None

        # ORCHESTRATION: execution of the run
        self.run_executor = None  # type: Optional[RunExecutor]

        # dag_id , execution_date are used by Airflow,
        # should be deprecated (still used by DB tracking)
        self.dag_id = AD_HOC_DAG_PREFIX + self.job_name

        # RUN STATE
        self._run_state = None
        self.task_runs = []  # type: List[TaskRun]
        self.task_runs_by_id = {}
        self.task_runs_by_af_id = {}

        self.target_origin = TargetIdentitySourceMap()
        self.describe = RunBanner(self)
        self.tracker = RunTracker(self,
                                  tracking_store=self.context.tracking_store)

        # ALL RUN CONTEXT SPECIFIC thing
        self.root_run_info = RootRunInfo.from_env(current_run=self)
        self.scheduled_run_info = scheduled_run_info or ScheduledRunInfo.from_env(
            self.run_uid)
        self.env = self.context.env
        self.run_folder_prefix = os.path.join(
            "log",
            self.execution_date.strftime("%Y-%m-%d"),
            "%s_%s_%s" % (
                self.execution_date.strftime("%Y-%m-%dT%H%M%S.%f"),
                self.job_name,
                self.name,
            ),
        )
        self.run_root = self.env.dbnd_root.folder(self.run_folder_prefix)
        self.run_local_root = self.env.dbnd_local_root.folder(
            self.run_folder_prefix)

        self.local_engine = build_engine_config(
            self.env.local_engine).clone(require_submit=False)

        self.dynamic_af_tasks_count = dict()
        self.af_context = af_context
        self.start_time = None
        self.finished_time = None