def wait(timeout): with new_dbnd_context(name="new_context"): cfg = CoreConfig() tracking_store = cfg.get_tracking_store() logger.info( "Waiting {} seconds for tracker to become ready:".format(timeout)) is_ready = wait_until(tracking_store.is_ready, timeout) if not is_ready: logger.error( "Tracker is not ready after {} seconds.".format(timeout)) sys.exit(1) logger.info("Tracker is ready.")
def send_heartbeat(run_uid, databand_url, heartbeat_interval, driver_pid, tracker, tracker_api): from dbnd import config from dbnd._core.settings import CoreConfig from dbnd._core.task_executor.heartbeat_sender import send_heartbeat_continuously with config({ "core": { "tracker": tracker.split(","), "tracker_api": tracker_api, "databand_url": databand_url, } }): requred_context = [] if tracker_api == "db": from dbnd import new_dbnd_context requred_context.append( new_dbnd_context(name="send_heartbeat", autoload_modules=False)) with nested_context.nested(*requred_context): tracking_store = CoreConfig().get_tracking_store() send_heartbeat_continuously(run_uid, tracking_store, heartbeat_interval, driver_pid)
def stop(self): if not self._active: return self._active = False try: databand_run = self._run root_tr = self._task_run root_tr.finished_time = utcnow() if root_tr.task_run_state not in TaskRunState.finished_states(): for tr in databand_run.task_runs: if tr.task_run_state == TaskRunState.FAILED: root_tr.set_task_run_state(TaskRunState.UPSTREAM_FAILED) break else: root_tr.set_task_run_state(TaskRunState.SUCCESS) if root_tr.task_run_state == TaskRunState.SUCCESS: databand_run.set_run_state(RunState.SUCCESS) else: databand_run.set_run_state(RunState.FAILED) # todo: hard to control the console output if we printing to the console not from the console tracker if not CoreConfig.current().silence_tracking_mode: logger.info(databand_run.describe.run_banner_for_finished()) self._close_all_context_managers() except Exception as ex: _handle_tracking_error("dbnd-tracking-shutdown")
def _get_databand_url(): try: external = TrackingConfig().databand_external_url if external: return external return CoreConfig().databand_url except Exception: pass
def track_databricks_submit_run_operator(operator): config = operator.json # passing env variables is only supported in new clusters if "new_cluster" in config: cluster = config["new_cluster"] if "spark_env_vars" not in cluster: cluster["spark_env_vars"] = {} spark_vars = cluster["spark_env_vars"] spark_vars["AIRFLOW_CTX_DAG_ID"] = "{{ dag.dag_id }}" spark_vars["AIRFLOW_CTX_EXECUTION_DATE"] = "{{ ts }}" spark_vars["AIRFLOW_CTX_TASK_ID"] = "{{ task.task_id }}" spark_vars["AIRFLOW_CTX_TRY_NUMBER"] = "{{ task_instance._try_number }}" spark_vars["DBND__CORE__DATABAND_URL"] = CoreConfig().databand_url
def extend_airflow_ctx_with_dbnd_tracking_info(task_run, airflow_ctx_env): info = airflow_ctx_env.copy() info[DBND_ROOT_RUN_UID] = task_run.run.root_run_info.root_run_uid info[DBND_ROOT_RUN_TRACKER_URL] = task_run.run.root_run_info.root_run_url info[DBND_PARENT_TASK_RUN_UID] = task_run.task_run_uid info[DBND_PARENT_TASK_RUN_ATTEMPT_UID] = task_run.task_run_attempt_uid core = CoreConfig.from_databand_context() info["DBND__CORE__DATABAND_URL"] = core.databand_url info["DBND__CORE__DATABAND_ACCESS_TOKEN"] = core.databand_access_token info = {n: str(v) for n, v in six.iteritems(info) if v is not None} return info
def test_wrong_config_validation(self): # raise exception with pytest.raises(UnknownParameterError) as e: with config({ "TTask": { "t_parammm": 2, "validate_no_extra_params": ParamValidation.error, } }): TTask() assert "Did you mean: t_param" in e.value.help_msg # log warning to log with config({ "TTask": { "t_parammm": 2, "validate_no_extra_params": ParamValidation.warn, } }): TTask() # tried to add a capsys assert here but couldn't get it to work # do nothing with config({ "TTask": { "t_parammm": 2, "validate_no_extra_params": ParamValidation.disabled, } }): TTask() # handle core config sections too with pytest.raises( DatabandError ): # might be other extra params in the config in which case a DatabandBuildError will be raised with config({ "config": { "validate_no_extra_params": ParamValidation.error }, "core": { "blabla": "bla" }, }): CoreConfig()
def run_validation(self): monitor_config = AirflowMonitorConfig() core_config = CoreConfig() if not core_config.databand_url: self.errors_list.append( "No databand url found in the configuration") if not core_config.databand_access_token: self.errors_list.append( "No access token found in the configuration") if not monitor_config.syncer_name: self.errors_list.append( "No syncer name found in the configuration") if len(self.errors_list) == 0: logger.info("All required configurations exist")
def try_get_or_create_task_run(): # type: ()-> TaskRunTracker task_run = try_get_current_task_run() if task_run: return task_run try: from dbnd._core.task_run.task_run_tracker import TaskRunTracker from dbnd._core.configuration.environ_config import DBND_TASK_RUN_ATTEMPT_UID tra_uid = os.environ.get(DBND_TASK_RUN_ATTEMPT_UID) if tra_uid: task_run = TaskRunMock(tra_uid) from dbnd import config from dbnd._core.settings import CoreConfig with config({CoreConfig.tracker_raise_on_error: False}, source="ondemand_tracking"): tracking_store = CoreConfig().get_tracking_store() trt = TaskRunTracker(task_run, tracking_store) task_run.tracker = trt return task_run # let's check if we are in airflow env from dbnd._core.inplace_run.airflow_dag_inplace_tracking import ( try_get_airflow_context, ) airflow_context = try_get_airflow_context() if airflow_context: from dbnd._core.inplace_run.airflow_dag_inplace_tracking import ( get_airflow_tracking_manager, ) atm = get_airflow_tracking_manager(airflow_context) if atm: return atm.airflow_operator__task_run from dbnd._core.inplace_run.inplace_run_manager import is_inplace_run if is_inplace_run(): return dbnd_run_start() except Exception: logger.info("Failed during dbnd inplace tracking init.", exc_info=True) return None
def _get_task_run_mock(tra_uid): """ We need better implementation for this, currently in use only for spark """ try: from dbnd._core.task_run.task_run_tracker import TaskRunTracker task_run = TaskRunMock(tra_uid) from dbnd import config from dbnd._core.settings import CoreConfig with config( {CoreConfig.tracker_raise_on_error: False}, source="on_demand_tracking" ): tracking_store = CoreConfig().get_tracking_store() trt = TaskRunTracker(task_run, tracking_store) task_run.tracker = trt return task_run except Exception: logger.info("Failed during dbnd inplace tracking init.", exc_info=True) return None
def _get_databand_url(): try: return CoreConfig().databand_external_url except Exception: pass