Ejemplo n.º 1
0
class DatabandSystemConfig(Config):
    """Databand's command line arguments (see `dbnd run --help`)"""

    _conf__task_family = "databand"

    verbose = parameter(
        description="Make logging output more verbose").value(False)
    print_task_band = parameter(
        description="Print task_band in logging output.").value(False)

    describe = parameter.help("Describe current run").value(False)

    module = parameter(
        default=None,
        description="Auto load this module before resolving user classes")[str]
    env = parameter(
        default=CloudType.local,
        description="task environment: local/aws/aws_prod/gcp/prod",
    )[str]

    conf_file = parameter(default=None,
                          description="List of files to read from")[List[str]]
    # keep it last one, parameter keyword is used for parameters generation
    # all following params will be dict
    conf = parameter(
        default=None,
        description="JSON string/key=value that gets into the Task attribute",
    )[Dict[str, str]]

    project_name = parameter(default="databand_project",
                             description="Name of this databand project")[str]
Ejemplo n.º 2
0
class CoreConfig(Config):
    """Databand's core functionality behaviour"""

    _conf__task_family = "core"

    dags_subdir = parameter.system(
        default="",
        description="File location or directory from which to look for the dag",
    )

    environments = parameter(
        description="List of enabled environments")[List[str]]

    dbnd_user = parameter(
        description="user used to connect to the dbnd web server")[str]
    dbnd_password = parameter(
        description="password used to connect to the dbnd web server")[str]
    databand_url = parameter(
        default=None,
        description="Tracker URL to be used for creating links in console logs",
    )[str]
    databand_personal_access_token = parameter(
        description="Personall access token to connect to the dbnd web server",
        default=None,
    )[str]

    # Backward compatibility
    tracker_url = parameter(
        default=None,
        description=
        "OLD: Tracker URL to be used for creating links in console logs",
    )[str]

    tracker_version = parameter[str]

    user_configs = parameter(
        empty_default=True,
        description="Contains the config for creating tasks from user code",
    )[List[str]]

    # user_pre_init = defined at Databand System config, dbnd_on_pre_init_context
    user_init = parameter(
        default=None,
        description=
        "Runs in every DBND process with System configuration in place (dbnd_post_enter_context)",
    )[object]
    user_driver_init = parameter(
        default=None,
        description=
        "Runs in driver after config initialization (dbnd_on_new_context)",
    )[object]

    user_code_on_fork = parameter(
        default=None,
        description="Runs in sub process (parallel/kubernets/external)"
    )[object]

    pickle_handler = parameter(
        default=None,
        description="Defines a python pickle handler to be used to pickle the "
        "run's data",
    )[str]

    tracker = parameter(default=["file", "console", "api"],
                        description="Tracking Stores to be used")[List[str]]
    tracker_raise_on_error = parameter(
        default=True,
        description="Raise error when failed to track data")[bool]
    tracker_api = parameter(default="web",
                            description="Tracking Stores to be used")[str]

    always_save_pipeline = parameter(
        description="Boolean for always saving pipeline to pickle").value(
            False)
    disable_save_pipeline = parameter(
        description="Boolean for disabling pipeline pickling").value(False)

    recheck_circle_dependencies = parameter(
        description="Re check circle dependencies on every task creation,"
        " use it if you need to find of circle in your graph ").value(False)

    hide_system_pipelines = parameter(
        description=
        "Hides the scheduled job launcher and driver submit pipelines at the API level to prevent clutter",
        default=True,
    )

    fix_env_on_osx = parameter(
        description=
        "add no_proxy=* to env vars, fixing issues with multiprocessing on osx"
    )[bool]

    plugins = parameter(
        description="plugins to load on databand context creation",
        default=None)[str]
    allow_vendored_package = parameter(
        description="Allow adding dbnd/_vendor_package to sys.path",
        default=False)[bool]

    def _validate(self):
        if not self.databand_url and self.tracker_url:
            logger.warning(
                "core.databand_url was not set, using deprecated 'core.tracker_url' instead."
            )
            self.databand_url = self.tracker_url

        if self.databand_url and self.databand_url.endswith("/"):
            dbnd_log_debug(
                "Please fix your core.databand_url value, "
                "it should not contain '/' at the end, auto-fix has been applied."
            )
            self.databand_url = self.databand_url[:-1]

        # automatically disabling tracking if databand_url is not set
        if not self.databand_url:
            dbnd_log_debug(
                "Automatically disabling tracking to databand service as databand_url is not set"
            )
            self.tracker = [t for t in self.tracker if t != "api"]

        if self.databand_personal_access_token and (self.dbnd_user
                                                    or self.dbnd_password):
            logger.warning(
                "core.databand_personal_access_token is used instead of defined dbnd_user and dbnd_password."
            )

    def build_tracking_store(self, remove_failed_store=True):
        from dbnd._core.tracking.registry import get_tracking_store

        return get_tracking_store(
            tracking_store_names=self.tracker,
            api_channel_name=self.tracker_api,
            tracker_raise_on_error=self.tracker_raise_on_error,
            remove_failed_store=remove_failed_store,
        )

    def build_databand_api_client(self):
        from dbnd.utils.api_client import ApiClient

        credentials = ({
            "token": self.databand_personal_access_token
        } if self.databand_personal_access_token else {
            "username": self.dbnd_user,
            "password": self.dbnd_password
        })

        return ApiClient(self.databand_url, credentials=credentials)
Ejemplo n.º 3
0
class CoreConfig(Config):
    """Databand's core functionality behaviour"""

    _conf__task_family = "core"

    dags_subdir = parameter.system(
        default="",
        description="File location or directory from which to look for the dag",
    )

    environments = parameter(description="List of enabled environments")[List[str]]

    databand_url = parameter(
        default=None,
        description="Tracker URL to be used for creating links in console logs",
    )[str]

    # Backward compatibility
    tracker_url = parameter(
        default=None,
        description="OLD: Tracker URL to be used for creating links in console logs",
    )[str]

    tracker_version = parameter[str]

    user_configs = parameter(
        empty_default=True,
        description="Contains the config for creating tasks from user code",
    )[List[str]]

    # user_pre_init = defined at Databand System config, dbnd_on_pre_init_context
    user_init = parameter(
        default=None,
        description="Runs in every DBND process with System configuration in place (dbnd_post_enter_context)",
    )[object]
    user_driver_init = parameter(
        default=None,
        description="Runs in driver after config initialization (dbnd_on_new_context)",
    )[object]

    user_code_on_fork = parameter(
        default=None, description="Runs in sub process (parallel/kubernets/external)"
    )[object]

    sql_alchemy_conn = parameter(description="The connection string for the database")[
        str
    ]

    pickle_handler = parameter(
        default=None,
        description="Defines a python pickle handler to be used to pickle the "
        "run's data",
    )[str]

    tracker = parameter(
        default=["file", "console", "api"], description="Tracking Stores to be used"
    )[List[str]]
    tracker_raise_on_error = parameter(
        default=True, description="Raise error when failed to track data"
    )[bool]
    tracker_api = parameter(default="web", description="Tracking Stores to be used")[
        str
    ]
    auto_create_local_db = parameter(
        default=True,
        description="Automatically create local SQLite db if it's not present",
    )[bool]

    always_save_pipeline = parameter(
        description="Boolean for always saving pipeline to pickle"
    ).value(False)
    disable_save_pipeline = parameter(
        description="Boolean for disabling pipeline pickling"
    ).value(False)

    recheck_circle_dependencies = parameter(
        description="Re check circle dependencies on every task creation,"
        " use it if you need to find of circle in your graph "
    ).value(False)

    hide_system_pipelines = parameter(
        description="Hides the scheduled job launcher and driver submit pipelines at the API level to prevent clutter",
        default=True,
    )

    fix_env_on_osx = parameter(
        description="add no_proxy=* to env vars, fixing issues with multiprocessing on osx"
    )[bool]

    fernet_key = parameter(
        description="key used by airflow to encrypt connections credentials",
        default=None,
    )[str]

    plugins = parameter(
        description="plugins to load on databand context creation", default=None
    )[str]

    def _validate(self):
        if self.databand_url and self.databand_url.endswith("/"):
            logger.warning(
                "Please fix your core.databand_url value, "
                "it should not containe / at the end, auto-fix has been applied."
            )
            self.databand_url = self.databand_url[:-1]

    def get_sql_alchemy_conn(self):
        return self.sql_alchemy_conn

    @property
    def sql_conn_repr(self):
        try:
            from sqlalchemy.engine.url import make_url
        except:
            return "`pip install sqlalchemy` in order to get sql db url"

        return repr(make_url(self.get_sql_alchemy_conn()))

    def _build_store(self, name):
        # type: (str) -> TrackingStore
        from dbnd._core.tracking.tracking_store_console import ConsoleStore
        from dbnd._core.tracking.tracking_store_file import FileTrackingStore
        from dbnd._core.tracking.tracking_store_api import TrackingStoreApi

        if name == "file":
            return FileTrackingStore()
        elif name == "console":
            return ConsoleStore()
        elif name == "debug":
            from dbnd._core.tracking.channels.tracking_debug_channel import (
                ConsoleDebugTrackingChannel,
            )

            return TrackingStoreApi(channel=ConsoleDebugTrackingChannel())
        elif name == "api":
            if not self.databand_url and self.tracker_url:
                # TODO: Backward compatibility, remove this when tracker_url is officially deprecated
                logger.warning(
                    "core.databand_url was not set, trying to use deprecated 'core.tracker_url' instead."
                )
                self.databand_url = self.tracker_url

            return self._build_tracking_api_store(
                tracker_api=self.tracker_api, databand_url=self.databand_url
            )

        raise friendly_error.config.wrong_store_name(name)

    def _build_tracking_api_store(self, tracker_api, databand_url):
        """
                                                             ctx (+DB)
                                                                |
        DBND -> Tracker -> WebChannel -> ApiClient -> HTTP -> Flask -> Views -x-> TrackingApiHandler -> TrackingDbService -> SQLA -> DB
                      \ -> DBChannel ----------------------------------------/
        """
        from dbnd._core.tracking.tracking_store_api import TrackingStoreApi

        if tracker_api == "web":
            from dbnd.api.tracking_api import TrackingApiClient

            if not databand_url:
                logger.debug(
                    "Although 'api' was set in 'core.tracker', and 'web' was set in 'core.tracker_api'"
                    "dbnd will not use it since 'core.databand_url' was not set."
                )
                return

            # TODO Add auth actually
            channel = TrackingApiClient(api_base_url=databand_url, auth=None)
        elif tracker_api == "db":
            assert_web_enabled(
                "It is required when trying to use local db connection (tracker_api=db)."
            )
            from dbnd_web.app import activate_dbnd_web_context

            # DirectDbChannel requires DB session, it's available in Flask Context
            activate_dbnd_web_context()

            from dbnd_web.api.v1.tracking_api import (
                TrackingApiHandler as DirectDbChannel,
            )

            channel = DirectDbChannel()
        elif tracker_api == "disabled":
            logger.info("Tracking store is disable at core.tracker_api.")
            return TrackingStore()
        else:
            raise friendly_error.config.wrong_tracking_api_name(tracker_api)
        return TrackingStoreApi(channel=channel)

    def get_tracking_store(self):
        # type: () -> TrackingStore
        from dbnd._core.tracking.tracking_store import CompositeTrackingStore

        store_names = self.tracker
        if len(store_names) == 1 and self.tracker_raise_on_error:
            # only composite store supports tracker_raise_on_error=False
            return self._build_store(store_names[0])
        if not store_names:
            logger.warning("You are running without any tracking store configured.")

        stores = []
        for name in store_names:
            store = self._build_store(name)
            if store:
                stores.append(store)

        return CompositeTrackingStore(
            stores=stores, raise_on_error=self.tracker_raise_on_error
        )

    def is_db_store_enabled(self):
        return "api" in self.tracker and self.tracker_api == "db"

    def get_scheduled_job_service(self):
        from dbnd.api import scheduler_api_client

        return scheduler_api_client