def failed_to_read_task_input(ex, task, parameter, target): return DatabandError( "Failed to read '{task.task_name}.{p.name}' from " "'{target}' to {p.value_type.type_str}: {ex}" "".format(p=parameter, target=target, task=task, ex=ex), nested_exceptions=ex, )
def get_param_value_safe(self, param_name): # type: (str) -> ParameterValue pv = self.get_param_value(param_name) if not pv: raise DatabandError("Param value '%s' is not found at %s" % (param_name, self.source)) return pv
def target(*path, **kwargs): """ autoresolving function :param path: :param kwargs: :return: FileTarget """ path = [str(p) for p in path] path = os.path.join(*path) if not path: raise DatabandError("Can not convert empty string '%s' to Target" % path) if "," in path: return MultiTarget(targets=[target(p, **kwargs) for p in path.split(",")]) fs = kwargs.pop("fs", None) config = kwargs.pop("config", None) config = extract_target_config_from_path(path, config=config) if path.endswith("[noflag]"): config = config.with_flag(None) path = path[:-8] if config.folder and not trailing_slash(path): path = "%s%s" % (path, os.path.sep if isinstance(fs, LocalFileSystem) else "/") if config.target_factory: return config.target_factory(path, fs=fs, config=config) if config.folder: from targets.dir_target import DirTarget return DirTarget(path, fs=fs, config=config) return FileTarget(path=path, fs=fs, config=config, **kwargs)
def track_batch_progress(self, batch_id, status_reporter=None): status_reporter = status_reporter or self._default_status_reporter # Poll the status of the submitted scala code current_line = 0 while True: batch_response = self.get_batch(batch_id) batch_status = batch_response["state"] status_reporter(batch_response) # logging the logs lines = self.get_all_batch_logs(batch_id, from_line=current_line)["log"] for line in lines: logger.info(line) current_line += len(lines) if batch_status.lower() not in BATCH_RUNNING_STATES: break time.sleep(10) status_reporter(batch_response) if batch_status.lower() in BATCH_ERROR_STATES: logger.info("Batch exception: see logs") raise DatabandError("Batch Status: " + batch_status) logger.info("Batch Status: " + batch_status)
def airflow_versioned_dag_missing(command): return DatabandError( "Could not run '%s', dbnd-airflow-versioned-dag is not installed." % command, help_msg= "Please run 'pip install dbnd-airflow-versioned-dag' in order to run '%s'." % command, show_exc_info=False, )
def failed_to_set_index(ex, df, set_index, target): return DatabandError( "Failed to set index to '{set_index}' " "for data frame with columns {columns} " "while reading from {target}: {ex}".format(set_index=set_index, columns=df.columns, target=_safe_target(target), ex=ex), nested_exceptions=[ex], )
def get_metric_history(self, key, source=None): metric_target = self.meta.get_metric_target(key, source=source) if not metric_target.exists(): raise DatabandError("Metric '%s' not found" % key) metric_data = metric_target.readlines() rsl = [] for pair in metric_data: ts, val = pair.strip().split(" ") rsl.append(Metric(key, float(val), datetime.fromtimestamp(int(ts)))) return rsl
def get_all_metrics_values(self, source=None): metrics = [] for key in self._get_all_metrics_names(source=source): try: metrics.append(self.get_metric(key, source=source)) except Exception as ex: raise DatabandError( "Failed to read metrics for %s at %s" % (key, self.meta.root), nested_exceptions=ex, ) return {m.key: m.value for m in metrics}
def task_found_in_airflow_dag(root_task): return DatabandError( "Task '%s' implementation has been discovered via DAGs loading" % root_task, help_msg= "Your task %s were loaded via dags folder, currently we don't support that.\n" "You should define your tasks not in airflow/dag folder, but inside your project.\n" "Use module key in a [databand] section of config file ( $DBND__DATABAND__MODULE )" % root_task, show_exc_info=False, )
def _validate_airflow_db(self): from airflow import configuration, settings # getting url directly from airflow # it's possible that # * user use _cmd to generate url ( we don't want to have an extra call there) # * Session was initialized with different value than AIRFLOW__CORE__SQL_CONN_STRING conn_string_url = settings.Session.session_factory.kw["bind"].url logger.info( "Using airflow executor '%s' with airflow DB at '%s' \nAIRFLOW_HOME='%s'", self.airflow_task_executor.__class__.__name__, conn_string_url.__repr__(), configuration.AIRFLOW_HOME, ) help_msg = "Check that sql_alchemy_conn in airflow.cfg or environment variable " "AIRFLOW__CORE__SQL_ALCHEMY_CONN is set correctly and that you run airflow initdb command" err_msg = "You are running in Airflow mode (task_executor={}) with DB at {}".format( RunConfig().task_executor_type, conn_string_url.__repr__() ) from dbnd_airflow._vendor.database import database_exists try: database_exists(conn_string_url) except Exception as ex: raise DatabandError( "Airflow DB is not found! %s : %s" % (err_msg, str(ex)), help_msg=help_msg, nested_exceptions=[], ) try: with create_session() as session: session.query(DagRun).first() except Exception as ex: raise DatabandError( "Airflow DB is not initialized! %s : %s" % (err_msg, str(ex)), help_msg=help_msg, )
def get_children(self): # type: (...)-> List[Task] tic = self.dbnd_context.task_instance_cache children = [] for c_id in self.children: child_task = tic.get_task_by_id(c_id) if child_task is None: raise DatabandError( "You have created %s in different dbnd_context, " "can't find task object in current context!" % c_id) children.append(child_task) return children
def parse_bool(s): if s is None: return False val = str(s).lower().strip() if "#" in val: val = val.split("#")[0].strip() if val.lower() in ("t", "true", "yes", "1"): return True elif val.lower() in ("f", "false", "no", "0"): return False else: raise DatabandError("Can't parse '%s' as boolean" % s)
def load_python_attr_from_module(attr_path): m = re.match(r"^(\S+)\.(\S+)", attr_path) if not m: raise friendly_error.config.wrong_func_attr_format(attr_path) module_path, attr_name = m.group(1), m.group(2) module = _load_module(module_path, description="") if not hasattr(module, attr_name): raise DatabandError("Failed to import symbol %s" % attr_path) attr = getattr(module, attr_name) return attr
def _wait_for_pod_started(self, _logger=None): """ will try to raise an exception if the pod fails to start (see DbndPodLauncher.check_deploy_errors) """ _logger = _logger or self.log start_time = datetime.now() while True: pod_status = self.get_pod_status_v1() if not pod_status: raise DatabandError("Can not find pod at k8s:%s") # PATCH: validate deploy errors self.check_deploy_errors(pod_status) pod_phase = pod_status.status.phase if pod_phase.lower() != PodStatus.PENDING: return startup_delta = datetime.now() - start_time if startup_delta >= self.kube_config.startup_timeout: raise DatabandError("Pod is still not running after %s" % startup_delta) time.sleep(1) _logger.debug("Pod not yet started: %s", pod_status.status)
def failed_to_import_user_module(ex, module, description): s = format_exception_as_str(sys.exc_info()) msg = "Module '%s' can not be loaded: %s" % ( module, dbnd_module_not_found_tip(module), ) return DatabandError( "%s exception: %s." % (msg, s), help_msg= " Databand is trying to load user module '%s' as required by %s: \n " "Probably, it has compile errors or not exists." % (module, description), show_exc_info=False, )
def build_from_message(cls, task_run, msg, help_msg): """ Builds TaskRunErrror from string TODO: very ugly hack, we need to support TaskRunError without exc_info :param task_run: :param msg: :param help_msg: :return: TaskRunError """ try: raise DatabandError( msg, show_exc_info=False, help_msg=help_msg, ) except DatabandError as ex: return TaskRunError( exc_info=sys.exc_info(), traceback=traceback.format_exc(), task_run=task_run, airflow_context=task_run.airflow_context, )
def _task_list(task_or_task_list): try: task_list = list(task_or_task_list) except TypeError: task_list = [task_or_task_list] from targets import Target task_list_output = [] for t in task_list: if isinstance(t, Target): t = t.task airflow_task = _try_get_task_from_airflow_op(t) if airflow_task: t = airflow_task if not isinstance(t, Task): raise DatabandError("Relationships can only be set between " "Databand Tasks; received {}".format( t.__class__.__name__)) task_list_output.append(t) return task_list_output
def _get_all_tasks(self, upstream=False, should_run_only=False): seen = set() result = [] to_process = [self.task] # should be iterative, we don't like recursive as we can have huge nesting while to_process: current = to_process.pop(0) seen.add(current.task_id) if should_run_only and not current.ctrl.should_run(): continue t_dag = current.ctrl.task_dag result.append(current) for t_connected_task_id in t_dag._direction(upstream): if t_connected_task_id in seen: continue connected_task = self.get_task_by_task_id(t_connected_task_id) if not connected_task: raise DatabandError( "Can't resolve task %s by it's id" % t_connected_task_id ) to_process.append(connected_task) return set(result)
def failed_to_load_versioned_dagbag_plugin(exc): return DatabandError("Failed to switch to versioned dagbag!", exc, show_exc_info=True)
def _find_param(task, param_name): # type: (_BaseTask,str)->ParameterValue found = task.task_params.get_param_value(param_name) if not found: raise DatabandError("%s parameter not found at %s" % (param_name, task)) return found
def marshaller_no_merge(marshaller, target, partitions): return DatabandError( "Can't merge {p_len} partitions on read from {target} " "as current marshaller {marshaller} doesn't support merge funcitonality" .format(p_len=len(partitions), target=target, marshaller=marshaller))
def failed_to_write_pandas(ex, target): return DatabandError( "There is an error while writing to {target} {ex}".format( target=_safe_target(target), ex=ex), nested_exceptions=ex, )
def failed_to_write_task_output(ex, target, value_type): return DatabandError( "Failed to write '{target.name}' to '{target} {target.config} ({value_type}): {ex}" "".format(value_type=value_type, target=target, ex=ex), nested_exceptions=ex, )
def build_task_from_config(task_name, expected_type=None): if not task_name: raise DatabandError("Task name can not be None") tr = get_task_registry() return tr.build_dbnd_task(task_name=task_name, expected_type=expected_type)
def failed_to_read_pandas(ex, target): return DatabandError( "There is an error while reading {target}: {ex}".format( target=_safe_target(target), ex=ex), nested_exceptions=[ex], )
def dbnd_set_task_failed(self, pod_data): metadata = pod_data.metadata # noinspection PyBroadException logger.debug("Getting task run") task_run = _get_task_run_from_pod_data(pod_data) if not task_run: logger.info("Can't find a task run for %s", metadata.name) return if task_run.task_run_state == TaskRunState.FAILED: logger.info("Skipping 'failure' event from %s", metadata.name) return pod_ctrl = self.get_pod_ctrl(metadata.name, metadata.namespace) logs = [] try: log_printer = lambda x: logs.append(x) pod_ctrl.stream_pod_logs( print_func=log_printer, tail_lines=100, follow=False ) pod_ctrl.stream_pod_logs(print_func=log_printer, follow=False) except Exception as ex: # when deleting pods we get extra failure events so we will have lots of this in the log if isinstance(ex, ApiException) and ex.status == 404: logger.info( "failed to get log for pod %s: pod not found", metadata.name ) else: logger.error("failed to get log for %s: %s", metadata.name, ex) try: short_log = "\n".join(["out:%s" % l for l in logs[:15]]) except Exception as ex: logger.error( "failed to build short log message for %s: %s", metadata.name, ex ) short_log = None status_log = _get_status_log_safe(pod_data) from dbnd._core.task_run.task_run_error import TaskRunError # work around to build an error object try: err_msg = "Pod %s at %s has failed!" % (metadata.name, metadata.namespace) if short_log: err_msg += "\nLog:%s" % short_log if status_log: err_msg += "\nPod Status:%s" % status_log raise DatabandError( err_msg, show_exc_info=False, help_msg="Please see full pod log for more details", ) except DatabandError as ex: error = TaskRunError.build_from_ex(ex, task_run) airflow_task_state = get_airflow_task_instance_state(task_run=task_run) logger.debug("task airflow state: %s ", airflow_task_state) from airflow.utils.state import State if airflow_task_state == State.FAILED: # let just notify the error, so we can show it in summary it # we will not send it to databand tracking store task_run.set_task_run_state(TaskRunState.FAILED, track=False, error=error) logger.info( "%s", task_run.task.ctrl.banner( "Task %s has failed at pod '%s'!" % (task_run.task.task_name, metadata.name), color="red", task_run=task_run, ), ) else: if airflow_task_state == State.QUEUED: # Special case - no airflow code has been run in the pod at all. Must increment try number and send # to retry if exit code is matching if not pod_ctrl.handle_pod_retry( pod_data, task_run, increment_try_number=True ): # No retry was sent task_run.set_task_run_state( TaskRunState.FAILED, track=True, error=error ) elif airflow_task_state == State.RUNNING: # Task was killed unexpectedly -- probably pod failure in K8s - Possible retry attempt if not pod_ctrl.handle_pod_retry(pod_data, task_run): # No retry was sent task_run.set_task_run_state( TaskRunState.FAILED, track=True, error=error ) else: task_run.set_task_run_state( TaskRunState.FAILED, track=True, error=error ) if logs: task_run.tracker.save_task_run_log("\n".join(logs))
def get_artifact(self, name): artifact_target = self.meta.get_artifact_target(name) if not artifact_target.exists(): raise DatabandError("Artifact '%s' not found" % name) return Artifact(artifact_target.path)
def _validate_airflow_db(self): from airflow import configuration, settings # getting url directly from airflow # it's possible that # * user use _cmd to generate url ( we don't want to have an extra call there) # * Session was initialized with different value than AIRFLOW__CORE__SQL_CONN_STRING conn_string_url = settings.Session.session_factory.kw["bind"].url logger.info( "Using airflow executor '%s' with airflow DB at '%s' \nAIRFLOW_HOME='%s'", self.airflow_task_executor.__class__.__name__, conn_string_url.__repr__(), configuration.AIRFLOW_HOME, ) not_exist_help_msg = ( "Check that sql_alchemy_conn in airflow.cfg or environment variable " + "AIRFLOW__CORE__SQL_ALCHEMY_CONN is set correctly.") not_initialised_help_mdg = "Make sure that you run the command: airflow initdb" err_msg = "You are running in Airflow mode (task_executor={}) with DB at {}".format( RunConfig().task_executor_type, conn_string_url.__repr__()) from dbnd_airflow._vendor.database import database_exists try: database_exists(conn_string_url) except Exception as ex: raise DatabandError( "Airflow DB is not found! %s : %s" % (err_msg, str(ex)), help_msg=not_exist_help_msg, nested_exceptions=[], ) try: with create_session() as session: session.query(DagRun).first() except Exception as ex: raise DatabandError( "Airflow DB is not initialized! %s : %s" % (err_msg, str(ex)), help_msg=not_initialised_help_mdg, ) pool_help_msg = ( "Check that you did not change dbnd_pool configuration in airflow.cfg " + "and that you run the command: airflow initdb.") user_defined_pool = dbnd_config.get("airflow", "dbnd_pool") is_defined_pool_dbnd = user_defined_pool == "dbnd_pool" is_user_pool_in_db = (session.query( Pool.pool).filter(Pool.pool == user_defined_pool).first() is not None) if not is_user_pool_in_db: if is_defined_pool_dbnd: create_airflow_pool(user_defined_pool) else: raise DatabandError( "Airflow DB does not have dbnd_pool entry in slots table", help_msg=pool_help_msg, )
def load_python_callable(callable_path): callable_attr = load_python_attr_from_module(callable_path) if not callable(callable_attr): raise DatabandError("The `%s` is not `callable`" % callable_attr) return callable_attr
def failed_to_retrieve_dag_via_dbnd_versioned_dagbag(exc): return DatabandError("Failed to retrieve DAG from DbndDagBag!", exc=exc, show_exc_info=True)