Example #1
0
def project_path(*path):
    """
    Gets a local path from databand directory and returns its absolute path.

    Deprecated. Look at dbnd_project_config at dbnd._core.configuration.environ_config
    """
    return get_dbnd_project_config().dbnd_project_path(*path)
Example #2
0
def databand_system_path(*path):
    """
    Deprecated function.

    Look at dbnd_project_config at dbnd._core.configuration.environ_config.
    """
    return get_dbnd_project_config().dbnd_system_path(*path)
Example #3
0
def set_tracking_context():
    try:
        reset_dbnd_project_config()
        get_dbnd_project_config()._dbnd_tracking = True
        yield
    finally:
        dbnd_tracking_stop()
        reset_dbnd_project_config()
Example #4
0
def dbnd_bootstrap():
    """Runs dbnd bootstrapping."""
    global _dbnd_bootstrap
    global _dbnd_bootstrap_started
    if _dbnd_bootstrap_started:
        return
    _dbnd_bootstrap_started = True

    dbnd_system_bootstrap()

    dbnd_project_config = get_dbnd_project_config()

    from targets.marshalling import register_basic_data_marshallers

    register_basic_data_marshallers()

    _surpress_loggers()
    _suppress_warnings()
    enable_osx_forked_request_calls()

    register_dbnd_plugins()

    from dbnd._core.configuration import environ_config
    from dbnd._core.configuration.dbnd_config import config
    from dbnd._core.plugin.dbnd_plugins import pm
    from dbnd._core.utils.basics.load_python_module import run_user_func

    user_plugins = config.get("core", "plugins", None)
    if user_plugins:
        register_dbnd_user_plugins(user_plugins.split(","))

    if is_unit_test_mode():
        pm.hook.dbnd_setup_unittest()

    pm.hook.dbnd_setup_plugin()

    if dbnd_project_config.is_sigquit_handler_on:
        from dbnd._core.utils.basics.signal_utils import (
            register_sigquit_stack_dump_handler,
        )

        register_sigquit_stack_dump_handler()

    # now we can run user code ( at driver/task)
    user_preinit = environ_config.get_user_preinit()
    if user_preinit:
        run_user_func(user_preinit)

    # if for any reason there will be code that calls dbnd_bootstrap, this will prevent endless recursion
    _dbnd_bootstrap = True
Example #5
0
def dbnd_system_bootstrap():
    global _dbnd_system_bootstrap
    if _dbnd_system_bootstrap:
        return
    try:
        _dbnd_system_bootstrap = True

        # this will also initialize env if it's not initialized
        project_config = get_dbnd_project_config()

        if not project_config.quiet_mode:
            logger.info("Starting Databand %s!\n%s", dbnd.__version__,
                        _env_banner())
        from databand import dbnd_config

        dbnd_config.load_system_configs()
    except Exception:
        _dbnd_system_bootstrap = False
        raise
Example #6
0
def dbnd_system_bootstrap():
    global _dbnd_system_bootstrap
    if _dbnd_system_bootstrap:
        return
    try:
        _dbnd_system_bootstrap = True

        # this will also initialize env if it's not initialized
        project_config = get_dbnd_project_config()
        if not project_config.quiet_mode:
            logger.info("Starting Databand %s!\n%s", dbnd.__version__, _env_banner())
            dbnd_run_info_source_version = os.environ.get(
                "DBND__RUN_INFO__SOURCE_VERSION"
            )
            if dbnd_run_info_source_version:
                logger.info("revision: %s", dbnd_run_info_source_version)
        from databand import dbnd_config

        dbnd_config.load_system_configs()
    except Exception:
        _dbnd_system_bootstrap = False
        raise
Example #7
0
    def _get_env_vars(self, conf_env_vars=None):
        env_vars = {
            DBND_TASK_RUN_ATTEMPT_UID: str(
                current().current_task_run.task_run_attempt_uid
            ),
            ENV_DBND__TRACKING: str(get_dbnd_project_config().is_tracking_mode()),
        }

        if conf_env_vars is None:
            conf_env_vars = self.config.env_vars
        if conf_env_vars:
            env_vars.update(conf_env_vars)
        if self.config.fix_pyspark_imports:
            env_vars[ENV_DBND_FIX_PYSPARK_IMPORTS] = "True"
        if self.config.disable_pluggy_entrypoint_loading:
            # Disable pluggy loading for spark-submitted run
            env_vars[ENV_DBND__DISABLE_PLUGGY_ENTRYPOINT_LOADING] = "True"
            plugin_modules = [p[0].replace("-", "_") for p in pm.list_name_plugin()]
            plugin_modules_formatted = ",".join(plugin_modules)
            # Attach all loaded plugins to be manually loaded in submitted run
            env_vars[ENV_DBND__CORE__PLUGINS] = plugin_modules_formatted

        return env_vars
Example #8
0
def databand_config_path(*path):
    return get_dbnd_project_config().dbnd_config_path(*path)
Example #9
0
def _is_verbose():
    config = get_dbnd_project_config()
    return config.is_verbose()
Example #10
0
    track_module_functions,
    track_modules,
)
from dbnd._core.utils.project.project_fs import (
    databand_lib_path,
    databand_system_path,
    project_path,
    relative_path,
)
from dbnd.tasks import basics
from targets import _set_patches

from dbnd._core.configuration.environ_config import (  # isort:skip
    get_dbnd_project_config, )

get_dbnd_project_config().validate_init()  # isort:skip

dbnd_config = config
__all__ = [
    "hookimpl",
    # context management
    "new_dbnd_context",
    "current",
    "dbnd_context",
    "current_task",
    "current_task_run",
    "get_databand_run",
    "get_databand_context",
    # inplace implementation
    "dbnd_run_start",
    "dbnd_run_stop",
Example #11
0
def project_path(*path):
    return get_dbnd_project_config().dbnd_project_path(*path)
Example #12
0
    def handle_callable_call(self, *call_args, **call_kwargs):
        dbnd_project_config = get_dbnd_project_config()
        if dbnd_project_config.disabled:
            return self.class_or_func(*call_args, **call_kwargs)

        # we are at tracking mode
        if dbnd_project_config.is_tracking_mode():
            with self.tracking_context(call_args, call_kwargs) as track_result_callback:
                fp_result = self.class_or_func(*call_args, **call_kwargs)
                return track_result_callback(fp_result)

        #### DBND ORCHESTRATION MODE
        #
        #     -= Use "Step into My Code"" to get back from dbnd code! =-
        #
        # decorated object call/creation  ( my_func(), MyDecoratedTask()
        # we are at orchestration mode

        task_cls = self.get_task_cls()

        if is_in_airflow_dag_build_context():
            # we are in Airflow DAG building mode - AIP-31
            return build_task_at_airflow_dag_context(
                task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs
            )

        current = try_get_current_task()
        if not current:
            # no tracking/no orchestration,
            # falling back to "natural call" of the class_or_func
            message = (
                "Can't report tracking info. %s is decorated with @task, but no tracking context was found"
                % (self.class_or_func.__name__,)
            )
            get_one_time_logger().log_once(message, "task_decorator", logging.WARNING)
            return self.class_or_func(*call_args, **call_kwargs)

        ######
        # current is not None, and we are not in tracking/airflow/luigi
        # this is DBND Orchestration mode
        # we can be in the context of task.run() or in task.band()
        # called from user code using user_decorated_func()  or UserDecoratedTask()

        if self.is_class:
            call_kwargs.pop("__call_original_cls", False)

        # we should not get here from _TaskFromTaskDecorator.invoke()
        # at that function we should call user code directly
        phase = current_phase()
        if phase is TaskContextPhase.BUILD:
            # we are in the @pipeline.band() context, we are building execution plan
            t = task_cls(*call_args, **call_kwargs)

            # we are in the band, and if user_code() is called we want to remove redundant
            # `user_code().result` usage
            if t.task_definition.single_result_output:
                return t.result

            # we have multiple outputs (more than one "output" parameter)
            # just return task object, user will use it as `user_code().output_1`
            return t
        elif phase is TaskContextPhase.RUN:
            # we are "running" inside some other task execution (orchestration!)
            #  (inside user_defined_function() or UserDefinedTask.run()

            # if possible we will run it as "orchestration" task
            # with parameters parsing
            if (
                current.settings.run.task_run_at_execution_time_enabled
                and current.task_supports_dynamic_tasks
            ):
                return self._run_task_from_another_task_execution(
                    parent_task=current, call_args=call_args, call_kwargs=call_kwargs
                )
            # we can not call it in "dbnd" way, fallback to normal call
            if self.is_class:
                call_kwargs["__call_original_cls"] = False
            return self.class_or_func(*call_args, **call_kwargs)
        else:
            raise Exception()
 def apply_env_vars_to_pod(self, pod):
     pod.envs["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
     if not get_dbnd_project_config().is_tracking_mode():
         pod.envs[ENV_DBND__TRACKING] = "False"
    def build_pod(
        self,
        task_run: TaskRun,
        cmds: List[str],
        args: Optional[List[str]] = None,
        labels: Optional[Dict[str, str]] = None,
        try_number: Optional[int] = None,
        include_system_secrets: bool = False,
    ) -> k8s.V1Pod:
        if not self.container_tag:
            raise DatabandConfigError(
                "Your container tag is None, please check your configuration",
                help_msg="Container tag should be assigned",
            )

        pod_name = self.get_pod_name(task_run=task_run, try_number=try_number)

        image = self.full_image
        labels = combine_mappings(labels, self.labels)
        labels["pod_name"] = pod_name

        labels["dbnd_run_uid"] = task_run.run.run_uid
        labels["dbnd_task_run_uid"] = task_run.task_run_uid
        labels["dbnd_task_run_attempt_uid"] = task_run.task_run_attempt_uid
        labels[
            "dbnd_task_family"] = task_run.task.task_definition.full_task_family_short
        labels["dbnd_task_name"] = task_run.task.task_name
        labels["dbnd_task_af_id"] = task_run.task_af_id

        # for easier pod deletion (kubectl delete pod -l dbnd=task_run -n <my_namespace>)
        if task_run.task.task_is_system:
            labels["dbnd"] = "dbnd_system_task_run"
        else:
            labels["dbnd"] = "task_run"

        # we need to be sure that the values meet the dns label names RFC
        # https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names
        labels = {
            label_name: clean_label_name_dns1123(str(label_value))
            for label_name, label_value in six.iteritems(labels)
        }
        if is_verbose():
            logger.info("Build pod with kubernetes labels {}".format(labels))

        annotations = self.annotations.copy()
        if self.gcp_service_account_keys:
            annotations[
                "iam.cloud.google.com/service-account"] = self.gcp_service_account_keys
        annotations["dbnd_tracker"] = task_run.task_tracker_url

        from dbnd_docker.kubernetes.vendorized_airflow.dbnd_extended_resources import (
            DbndExtendedResources, )

        resources = DbndExtendedResources(
            requests=self.requests,
            limits=self.limits,
            request_memory=self.request_memory,
            request_cpu=self.request_cpu,
            limit_memory=self.limit_memory,
            limit_cpu=self.limit_cpu,
        )
        env_vars = {
            ENV_DBND_POD_NAME: pod_name,
            ENV_DBND_POD_NAMESPACE: self.namespace,
            ENV_DBND_USER: task_run.task_run_env.user,
            ENV_DBND__ENV_IMAGE: image,
            ENV_DBND_ENV: task_run.run.env.task_name,
            ENV_DBND__ENV_MACHINE: "%s at %s" % (pod_name, self.namespace),
        }

        if AIRFLOW_VERSION_2:
            env_vars[
                "AIRFLOW__CORE__TASK_RUNNER"] = "dbnd_airflow.compat.dbnd_task_runner.DbndStandardTaskRunner"

        if self.auto_remove:
            env_vars[ENV_DBND_AUTO_REMOVE_POD] = "True"
        env_vars[self._params.get_param_env_key(self, "in_cluster")] = "True"
        env_vars["AIRFLOW__KUBERNETES__IN_CLUSTER"] = "True"
        env_vars[
            "DBND__RUN_INFO__SOURCE_VERSION"] = task_run.run.context.task_run_env.user_code_version
        env_vars["AIRFLOW__KUBERNETES__DAGS_IN_IMAGE"] = "True"
        if not get_dbnd_project_config().is_tracking_mode():
            env_vars[ENV_DBND__TRACKING] = "False"
        # we want that all next runs will be able to use the image that we have in our configuration

        env_vars.update(
            self._params.to_env_map(self, "container_repository",
                                    "container_tag"))

        env_vars.update(self.env_vars)
        env_vars.update(task_run.run.get_context_spawn_env())

        secrets = self.get_secrets(
            include_system_secrets=include_system_secrets)

        if self.trap_exit_file_flag:
            args = [
                textwrap.dedent("""
                trap "touch {trap_file}" EXIT
                {command}
                """.format(
                    trap_file=self.trap_exit_file_flag,
                    command=subprocess.list2cmdline(cmds),
                ))
            ]
            # we update cmd now
            cmds = ["/bin/bash", "-c"]

        if self.debug_with_command:
            logger.warning(
                "%s replacing pod %s command with '%s', original command=`%s`",
                task_run,
                pod_name,
                self.debug_with_command,
                subprocess.list2cmdline(cmds),
            )
            cmds = shlex.split(self.debug_with_command)

        base_pod = self._build_base_pod()

        pod = self._to_real_pod(
            cmds=cmds,
            args=args,
            namespace=self.namespace,
            name=pod_name,
            envs=env_vars,
            image=image,
            labels=labels,
            secrets=secrets,
            resources=resources,
            annotations=annotations,
        )

        final_pod = reconcile_pods(base_pod, pod)

        return final_pod