Exemple #1
0
def wait(timeout):
    with new_dbnd_context(name="new_context"):
        cfg = CoreConfig()

        tracking_store = cfg.get_tracking_store()
        logger.info(
            "Waiting {} seconds for tracker to become ready:".format(timeout))

        is_ready = wait_until(tracking_store.is_ready, timeout)
        if not is_ready:
            logger.error(
                "Tracker is not ready after {} seconds.".format(timeout))
            sys.exit(1)
        logger.info("Tracker is ready.")
Exemple #2
0
def send_heartbeat(run_uid, databand_url, heartbeat_interval, driver_pid,
                   tracker, tracker_api):
    from dbnd import config
    from dbnd._core.settings import CoreConfig
    from dbnd._core.task_executor.heartbeat_sender import send_heartbeat_continuously

    with config({
            "core": {
                "tracker": tracker.split(","),
                "tracker_api": tracker_api,
                "databand_url": databand_url,
            }
    }):
        requred_context = []
        if tracker_api == "db":
            from dbnd import new_dbnd_context

            requred_context.append(
                new_dbnd_context(name="send_heartbeat",
                                 autoload_modules=False))

        with nested_context.nested(*requred_context):
            tracking_store = CoreConfig().get_tracking_store()

            send_heartbeat_continuously(run_uid, tracking_store,
                                        heartbeat_interval, driver_pid)
    def stop(self):
        if not self._active:
            return
        self._active = False
        try:
            databand_run = self._run
            root_tr = self._task_run
            root_tr.finished_time = utcnow()

            if root_tr.task_run_state not in TaskRunState.finished_states():
                for tr in databand_run.task_runs:
                    if tr.task_run_state == TaskRunState.FAILED:
                        root_tr.set_task_run_state(TaskRunState.UPSTREAM_FAILED)
                        break
                else:
                    root_tr.set_task_run_state(TaskRunState.SUCCESS)

            if root_tr.task_run_state == TaskRunState.SUCCESS:
                databand_run.set_run_state(RunState.SUCCESS)
            else:
                databand_run.set_run_state(RunState.FAILED)

            # todo: hard to control the console output if we printing to the console not from the console tracker
            if not CoreConfig.current().silence_tracking_mode:
                logger.info(databand_run.describe.run_banner_for_finished())

            self._close_all_context_managers()

        except Exception as ex:
            _handle_tracking_error("dbnd-tracking-shutdown")
Exemple #4
0
def _get_databand_url():
    try:
        external = TrackingConfig().databand_external_url
        if external:
            return external
        return CoreConfig().databand_url
    except Exception:
        pass
Exemple #5
0
def track_databricks_submit_run_operator(operator):
    config = operator.json
    # passing env variables is only supported in new clusters
    if "new_cluster" in config:
        cluster = config["new_cluster"]
        if "spark_env_vars" not in cluster:
            cluster["spark_env_vars"] = {}
        spark_vars = cluster["spark_env_vars"]
        spark_vars["AIRFLOW_CTX_DAG_ID"] = "{{ dag.dag_id }}"
        spark_vars["AIRFLOW_CTX_EXECUTION_DATE"] = "{{ ts }}"
        spark_vars["AIRFLOW_CTX_TASK_ID"] = "{{ task.task_id }}"
        spark_vars["AIRFLOW_CTX_TRY_NUMBER"] = "{{ task_instance._try_number }}"
        spark_vars["DBND__CORE__DATABAND_URL"] = CoreConfig().databand_url
Exemple #6
0
def extend_airflow_ctx_with_dbnd_tracking_info(task_run, airflow_ctx_env):
    info = airflow_ctx_env.copy()

    info[DBND_ROOT_RUN_UID] = task_run.run.root_run_info.root_run_uid
    info[DBND_ROOT_RUN_TRACKER_URL] = task_run.run.root_run_info.root_run_url
    info[DBND_PARENT_TASK_RUN_UID] = task_run.task_run_uid
    info[DBND_PARENT_TASK_RUN_ATTEMPT_UID] = task_run.task_run_attempt_uid

    core = CoreConfig.from_databand_context()
    info["DBND__CORE__DATABAND_URL"] = core.databand_url
    info["DBND__CORE__DATABAND_ACCESS_TOKEN"] = core.databand_access_token

    info = {n: str(v) for n, v in six.iteritems(info) if v is not None}
    return info
    def test_wrong_config_validation(self):
        # raise exception
        with pytest.raises(UnknownParameterError) as e:
            with config({
                    "TTask": {
                        "t_parammm": 2,
                        "validate_no_extra_params": ParamValidation.error,
                    }
            }):
                TTask()

        assert "Did you mean: t_param" in e.value.help_msg

        # log warning to log
        with config({
                "TTask": {
                    "t_parammm": 2,
                    "validate_no_extra_params": ParamValidation.warn,
                }
        }):
            TTask()
        # tried to add a capsys assert here but couldn't get it to work

        # do nothing
        with config({
                "TTask": {
                    "t_parammm": 2,
                    "validate_no_extra_params": ParamValidation.disabled,
                }
        }):
            TTask()

        # handle core config sections too
        with pytest.raises(
                DatabandError
        ):  # might be other extra params in the config in which case a DatabandBuildError will be raised
            with config({
                    "config": {
                        "validate_no_extra_params": ParamValidation.error
                    },
                    "core": {
                        "blabla": "bla"
                    },
            }):
                CoreConfig()
Exemple #8
0
    def run_validation(self):
        monitor_config = AirflowMonitorConfig()
        core_config = CoreConfig()

        if not core_config.databand_url:
            self.errors_list.append(
                "No databand url found in the configuration")

        if not core_config.databand_access_token:
            self.errors_list.append(
                "No access token found in the configuration")

        if not monitor_config.syncer_name:
            self.errors_list.append(
                "No syncer name found in the configuration")

        if len(self.errors_list) == 0:
            logger.info("All required configurations exist")
Exemple #9
0
def try_get_or_create_task_run():
    # type: ()-> TaskRunTracker
    task_run = try_get_current_task_run()
    if task_run:
        return task_run

    try:
        from dbnd._core.task_run.task_run_tracker import TaskRunTracker
        from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID

        tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID)
        if tra_uid:
            task_run = TaskRunMock(tra_uid)
            from dbnd import config
            from dbnd._core.settings import CoreConfig

            with config({CoreConfig.tracker_raise_on_error: False},
                        source="ondemand_tracking"):
                tracking_store = CoreConfig().get_tracking_store()
                trt = TaskRunTracker(task_run, tracking_store)
                task_run.tracker = trt
                return task_run

        # let's check if we are in airflow env
        from dbnd._core.inplace_run.airflow_dag_inplace_tracking import (
            try_get_airflow_context, )

        airflow_context = try_get_airflow_context()
        if airflow_context:
            from dbnd._core.inplace_run.airflow_dag_inplace_tracking import (
                get_airflow_tracking_manager, )

            atm = get_airflow_tracking_manager(airflow_context)
            if atm:
                return atm.airflow_operator__task_run
        from dbnd._core.inplace_run.inplace_run_manager import is_inplace_run

        if is_inplace_run():
            return dbnd_run_start()

    except Exception:
        logger.info("Failed during dbnd inplace tracking init.", exc_info=True)
        return None
Exemple #10
0
def _get_task_run_mock(tra_uid):
    """
    We need better implementation for this,
    currently in use only for spark
    """
    try:
        from dbnd._core.task_run.task_run_tracker import TaskRunTracker

        task_run = TaskRunMock(tra_uid)
        from dbnd import config
        from dbnd._core.settings import CoreConfig

        with config(
            {CoreConfig.tracker_raise_on_error: False}, source="on_demand_tracking"
        ):
            tracking_store = CoreConfig().get_tracking_store()
            trt = TaskRunTracker(task_run, tracking_store)
            task_run.tracker = trt
            return task_run
    except Exception:
        logger.info("Failed during dbnd inplace tracking init.", exc_info=True)
        return None
def _get_databand_url():
    try:
        return CoreConfig().databand_external_url
    except Exception:
        pass