Exemple #1
0
    def _on_enter(self):
        pm.hook.dbnd_on_pre_init_context(ctx=self)
        run_user_func(config.get("core", "user_pre_init"))
        # if we are deserialized - we don't need to run this code again.
        if not self.initialized_context:
            # noinspection PyTypeChecker
            if self._module:
                load_python_module(self._module, "--module")

            module_from_config = config.get("databand", "module")
            if self._autoload_modules and module_from_config:
                load_python_module(module_from_config,
                                   "config file (see [databand].module)")

            # will be called from singleton context manager
            # we want to be able to catch all "new" inline airflow operators
            self.system_settings = DatabandSystemConfig()
            if self.system_settings.conf:
                self.config.set_values(self.system_settings.conf,
                                       source="dbnd.conf")
            if self.system_settings.conf_file:
                conf_file = read_from_config_files(
                    self.system_settings.conf_file)
                self.config.set_values(conf_file,
                                       source="dbnd__databand__conf")

            from dbnd._core.settings import DatabandSettings

            self.settings = DatabandSettings(databand_context=self)
            self.env = self.settings.get_env_config(self.system_settings.env)
            self.config.set_values(
                config_values={"task": {
                    "task_env": self.system_settings.env
                }},
                source="context",
            )

            pm.hook.dbnd_on_new_context(ctx=self)

            # RUN USER SETUP FUNCTIONS
            _run_user_func(
                self.settings.core.__class__.user_driver_init,
                self.settings.core.user_driver_init,
            )

            self.task_run_env = RunInfoConfig().build_task_run_info()
            self.initialized_context = True
        else:
            # we get here if we are running at sub process that recreates the Context
            pm.hook.dbnd_on_existing_context(ctx=self)

        # we do it every time we go into databand_config
        self.configure_targets()
        self.settings.log.configure_dbnd_logging()

        _run_user_func(self.settings.core.__class__.user_init,
                       self.settings.core.user_init)
        pm.hook.dbnd_post_enter_context(ctx=self)
Exemple #2
0
def basic_logging_config(
    filename=None,
    log_level=logging.INFO,
    console_stream=sys.stderr,
    console_formatter_name="formatter_colorlog",
    file_formatter_name="formatter_full",
):
    # type: (...) -> Optional[dict]

    config = {
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {
            "formatter_full": {
                "format": FORMAT_FULL
            },
            "formatter_simple": {
                "format": FORMAT_SIMPLE
            },
            "formatter_colorlog": {
                "()": "dbnd._vendor.colorlog.ColoredFormatter",
                "format": FORMAT_COLORLOG,
                "reset": True,
            },
        },
        "handlers": {
            "console": {
                "class": "logging.StreamHandler",
                "stream": console_stream,
                "formatter": console_formatter_name,
            }
        },
        "root": {
            "handlers": ["console"],
            "level": log_level
        },
    }
    if filename:
        setup_log_file(filename)
        config["handlers"]["file"] = {
            "class": "logging.FileHandler",
            "formatter": file_formatter_name,
            "filename": filename,
            "encoding": "utf-8",
        }
        config["root"]["handlers"].append("file")

    sentry_url = dbnd_config.get("log", "sentry_url")
    if sentry_url:
        sentry_env = dbnd_config.get("log", "sentry_env", default="dev")

        config["handlers"]["sentry"] = get_sentry_logging_config(
            sentry_url=sentry_url, sentry_env=sentry_env)
        config["root"]["handlers"].append("sentry")

    return config
def get_dags_from_databand(custom_operator_class: Optional[type] = None):
    if environ_enabled(ENV_DBND_DISABLE_SCHEDULED_DAGS_LOAD):
        return None
    from dbnd._core.errors.base import DatabandApiError, DatabandConnectionException

    try:

        # let be sure that we are loaded
        config.load_system_configs()
        if not config.get("core", "databand_url"):
            return {}

        default_retries = config.getint("scheduler", "default_retries")

        dags = DbndSchedulerDBDagsProvider(
            default_retries=default_retries,
            custom_operator_class=custom_operator_class).get_dags()

        if not in_quiet_mode():
            logger.info("providing %s dags from scheduled jobs" % len(dags))
        return {dag.dag_id: dag for dag in dags}
    except (DatabandConnectionException, DatabandApiError) as e:
        logger.error(str(e))
        raise e
    except Exception as e:
        logging.exception("Failed to get dags form databand server")
        raise e
Exemple #4
0
def set_airflow_sql_conn_from_dbnd_config():
    logging.debug("updating airflow config from dbnd config")
    from dbnd._core.configuration.dbnd_config import config as dbnd_config

    sql_alchemy_conn = dbnd_config.get("airflow", "sql_alchemy_conn")
    if sql_alchemy_conn == "dbnd":
        logging.debug("updating airflow sql from dbnd core.sql_alchemy_conn")
        sql_alchemy_conn = dbnd_config.get("core", "sql_alchemy_conn")

    if sql_alchemy_conn and "AIRFLOW__CORE__SQL_ALCHEMY_CONN" not in os.environ:
        os.environ["AIRFLOW__CORE__SQL_ALCHEMY_CONN"] = sql_alchemy_conn

    fernet_key = dbnd_config.get("airflow", "fernet_key")
    if fernet_key == "dbnd":
        fernet_key = dbnd_config.get("core", "fernet_key")
    if fernet_key and "AIRFLOW__CORE__FERNET_KEY" not in os.environ:
        os.environ["AIRFLOW__CORE__FERNET_KEY"] = fernet_key
Exemple #5
0
 def test_str_interpolation(self):
     with config(
         {
             "b": dict(
                 a="@python://%s" % "test_dbnd.configuration.test_config_layers._a"
             )
         }
     ):
         assert config.get("b", "a") == "from_a"
Exemple #6
0
def dbnd_bootstrap():
    global _dbnd_bootstrap
    global _dbnd_bootstrap_started
    if _dbnd_bootstrap_started:
        return
    _dbnd_bootstrap_started = True

    dbnd_system_bootstrap()
    from targets.marshalling import register_basic_data_marshallers

    register_basic_data_marshallers()

    _surpress_loggers()
    _suppress_warnings()
    enable_osx_forked_request_calls()

    if is_airflow_enabled():
        from dbnd_airflow.bootstrap import airflow_bootstrap

        airflow_bootstrap()

    register_dbnd_plugins()

    from dbnd._core.configuration import environ_config
    from dbnd._core.utils.basics.load_python_module import run_user_func
    from dbnd._core.plugin.dbnd_plugins import pm

    from dbnd._core.configuration.dbnd_config import config

    user_plugins = config.get("core", "plugins", None)
    if user_plugins:
        register_dbnd_user_plugins(user_plugins.split(","))

    if is_unit_test_mode():
        pm.hook.dbnd_setup_unittest()

    pm.hook.dbnd_setup_plugin()

    if is_sigquit_handler_on():
        from dbnd._core.utils.basics.signal_utils import (
            register_sigquit_stack_dump_handler,
        )

        register_sigquit_stack_dump_handler()

    # now we can run user code ( at driver/task)
    user_preinit = environ_config.get_user_preinit()
    if user_preinit:
        run_user_func(user_preinit)

    # if for any reason there will be code that calls dbnd_bootstrap, this will prevent endless recursion
    _dbnd_bootstrap = True
Exemple #7
0
    def test_layers(self):
        with config({"b": dict(a=2)}):
            config.log_current_config()

            config.set("core", "a", "1")
            config.set("core", "b", "1")

            with config({"core": dict(a=5)}):
                config.log_current_config(as_table=True)
                assert config.get("core", "a") == 5

            config.log_current_config()
            config.log_layers()
Exemple #8
0
    def _on_enter(self):
        pm.hook.dbnd_on_pre_init_context(ctx=self)
        run_user_func(config.get("core", "user_pre_init"))
        # if we are deserialized - we don't need to run this code again.
        if not self._is_initialized:
            # will be called from singleton context manager
            self.system_settings = DatabandSystemConfig()
            if self.system_settings.conf:
                self.config.set_values(self.system_settings.conf,
                                       source="[databand]conf")
            if self.system_settings.conf_file:
                conf_file = read_from_config_files(
                    self.system_settings.conf_file)
                self.config.set_values(conf_file, source="[databand]conf")

            from dbnd._core.settings import DatabandSettings

            self.settings = DatabandSettings(databand_context=self)
            self.env = self.settings.get_env_config(self.system_settings.env)
            self.config.set_values(
                config_values={"task": {
                    "task_env": self.system_settings.env
                }},
                source="context",
            )

            pm.hook.dbnd_on_new_context(ctx=self)

            # RUN USER SETUP FUNCTIONS
            _run_user_func(
                self.settings.core.__class__.user_driver_init,
                self.settings.core.user_driver_init,
            )

            self.task_run_env = RunInfoConfig().build_task_run_info()
            self._is_initialized = True
        else:
            # we get here if we are running at sub process that recreates the Context
            pm.hook.dbnd_on_existing_context(ctx=self)

        # we do it every time we go into databand_config
        self.configure_targets()
        self.settings.log.configure_dbnd_logging()

        _run_user_func(self.settings.core.__class__.user_init,
                       self.settings.core.user_init)
        pm.hook.dbnd_post_enter_context(ctx=self)
Exemple #9
0
def get_job_run_uid(airflow_instance_uid, dag_id, execution_date):
    # TODO_CORE: change to source_instance_uid
    if isinstance(execution_date, six.string_types):
        execution_date = pendulum.parse(execution_date)
    if isinstance(execution_date, datetime.datetime):
        # Temporary fix for existing databases with uids without microseconds
        algo_threshold = config.get("webserver",
                                    "run_uid_execution_date_threshold")
        if algo_threshold and execution_date <= pendulum.parse(algo_threshold):
            execution_date = execution_date.replace(microsecond=0)
        execution_date = execution_date.astimezone(pytz.utc).isoformat()
    if airflow_instance_uid is None:
        return uuid.uuid5(NAMESPACE_DBND_RUN,
                          "{}:{}".format(dag_id, execution_date))
    else:
        return uuid.uuid5(
            NAMESPACE_DBND_RUN,
            "{}:{}:{}".format(airflow_instance_uid, dag_id, execution_date),
        )
Exemple #10
0
    def get_dags(self):  # type: () -> List[DAG]
        if not config.get("core", "databand_url"):
            self.scheduled_jobs = []
            return []
        logger.debug("about to get scheduler job dags from dbnd db")
        self.refresh_scheduled_jobs()
        dags = []
        for job in self.scheduled_jobs:
            if "schedule_interval" not in job:
                continue

            dag = self.job_to_dag(job)
            dag.sync_to_db()
            DagModel.get_dagmodel(dag.dag_id).set_is_paused(
                is_paused=not job["active"]
                or (job.get("validation_errors", None) is not None
                    and len(job.get("validation_errors", None)) > 0),
                including_subdags=False,
            )
            dags.append(dag)
        return dags
def get_dags_from_file():
    if environ_enabled(ENV_DBND_DISABLE_SCHEDULED_DAGS_LOAD):
        return None

    try:
        # let be sure that we are loaded
        config.load_system_configs()

        config_file = config.get("scheduler", "config_file")
        if not config_file:
            logger.info("No dags file has been defined at scheduler.config_file")
            return {}
        default_retries = config.getint("scheduler", "default_retries")
        active_by_default = config.getboolean("scheduler", "active_by_default")

        dags = DbndAirflowDagsProviderFromFile(
            config_file=config_file,
            active_by_default=active_by_default,
            default_retries=default_retries,
        ).get_dags()
        return {dag.dag_id: dag for dag in dags}
    except Exception as e:
        logging.exception("Failed to get dags from the file")
        raise e
Exemple #12
0
 def __init__(self, config_file=None):
     config.load_system_configs()
     self.config_file = (config_file if config_file else config.get(
         "scheduler", "config_file"))
     self.active_by_default = config.get("scheduler", "active_by_default")
Exemple #13
0
    def _get_task_cls(self, task_name):
        from dbnd._core.utils.basics.load_python_module import load_python_module

        task_cls = self._get_registered_task_cls(task_name)
        if task_cls:
            return task_cls

        # we are going to check if we have override/definition in config
        config_task_type = config.get(task_name, "_type", None)
        if config_task_type:
            _validate_no_recursion_in_config(task_name, config_task_type,
                                             "_type")
            try:
                return self._get_task_cls(config_task_type)
            except Exception:
                logger.error(
                    "Failed to load type required by [%s] using _type=%s",
                    task_name,
                    config_task_type,
                )
                raise
        config_task_type = config.get(task_name, "_from", None)
        if config_task_type:
            _validate_no_recursion_in_config(task_name, config_task_type,
                                             "_from")
            return self._get_task_cls(config_task_type)

        if "." in task_name:
            parts = task_name.split(".")
            possible_root_task = parts.pop()
            possible_module = ".".join(parts)

            # Try to load module and check again for existance
            load_python_module(possible_module, "task name '%s'" % task_name)

            task_cls = self._get_registered_task_cls(task_name)
            if task_cls:
                return task_cls

            # Check if task exists but user forgot to decorate method with @task
            task_module = sys.modules.get(possible_module)
            if task_module and hasattr(task_module, possible_root_task):
                user_func = getattr(task_module, possible_root_task)
                if callable(user_func):
                    # Non-decorated function was found - decorate and return it
                    from dbnd._core.decorator import dbnd_func_proxy

                    decorated_task = dbnd_func_proxy.task(user_func)
                    setattr(task_module, possible_root_task, decorated_task)
                    logger.warning(
                        "Found non-decorated task: %s. "
                        "Please decorate this task with the proper symbol @pipeline \ @task.\n"
                        "Auto-decorating and treating it as @task ...",
                        task_name,
                    )
                    return decorated_task.task

        if is_airflow_enabled():
            from dbnd_airflow.dbnd_task_executor.airflow_operator_as_dbnd import (
                AirflowDagAsDbndTask, )

            dag = self._get_aiflow_dag(task_name)
            if dag:
                return AirflowDagAsDbndTask
        return None
Exemple #14
0
 def __init__(self, dbnd_task_type, dbnd_task_id, **kwargs):
     super(DbndOperator, self).__init__(**kwargs)
     self._task_type = dbnd_task_type
     self.dbnd_task_id = dbnd_task_id
     # Make sure that we run in separate pool
     self.pool = dbnd_config.get("airflow", "dbnd_pool")
import json

from collections import namedtuple
from typing import Any, Dict

from dbnd._core.configuration.dbnd_config import config
from dbnd.api.api_utils import ApiClient
from dbnd.api.shared_schemas.scheduled_job_schema import ScheduledJobSchemaV2

config.load_system_configs()
api_client = ApiClient(
    config.get("core", "databand_url"),
    auth=True,
    user=config.get("scheduler", "dbnd_user"),
    password=config.get("scheduler", "dbnd_password"),
)

schema = ScheduledJobSchemaV2(strict=False)

ScheduledJobNamedTuple = namedtuple("ScheduledJobNamedTuple",
                                    schema.fields.keys())
ScheduledJobNamedTuple.__new__.__defaults__ = (None, ) * len(
    ScheduledJobNamedTuple._fields)


def post_scheduled_job(scheduled_job_dict):
    data, _ = schema.dump({"DbndScheduledJob": scheduled_job_dict})
    response = api_client.api_request("/api/v1/scheduled_jobs",
                                      data,
                                      method="POST",
                                      no_prefix=True)