def _log_result(task_run, result): """ For tracking mode. Logs the task result and adds it to the target_origin map to support relationships between dynamic tasks. """ try: task_result_parameter = task_run.task._params.get_param("result") # spread result into relevant fields. if isinstance(task_result_parameter, FuncResultParameter): # assign all returned values to relevant band Outputs if result is None: return for r_name, value in task_result_parameter.named_results(result): _log_parameter_value( task_run, parameter_definition=task_run.task._params.get_param( r_name), target=task_run.task._params.get_value(r_name), value=value, ) else: _log_parameter_value( task_run, parameter_definition=task_result_parameter, target=task_run.task.result, value=result, ) except Exception as ex: log_exception("Failed to log result to tracking store.", ex=ex, non_critical=True)
def parameter_exception(self, reason, ex): err_msg = "Failed to {reason} for parameter '{name}' at {task_family}()".format( reason=reason, name=self.name, task_family=self.task_family) log_exception(err_msg, ex, logger) raise DatabandBuildError(err_msg, nested_exceptions=[ex], help_msg=self._get_help_message())
def _log_parameter_value(task_run, parameter_definition, target, value): try: # case what if result is Proxy value_type = get_value_type_of_obj(value, parameter_definition.value_type) task_run.run.target_origin.add(target, value, value_type) except Exception as ex: log_exception("Failed to register result to target tracking.", ex=ex, non_critical=True) try: task_run.tracker.log_parameter_data( parameter= parameter_definition, # was: task_run.task.task_definition.task_class.result, target=target, value=value, operation_type=DbndTargetOperationType. write, # is it write? (or log?) operation_status=DbndTargetOperationStatus.OK, ) except Exception as ex: log_exception("Failed to log result to tracking store.", ex=ex, non_critical=True)
def log_parameter_data(self, parameter, target, value, operation_type, operation_status): # type: (TaskRunTracker, ParameterDefinition, Target, Any, DbndTargetOperationType, DbndTargetOperationStatus) -> None features_conf = self.settings.features if not features_conf.log_value_meta: return if value is None: return try: meta_conf = features_conf.get_value_meta_conf( parameter.value_meta_conf, value_type=parameter.value_type, target=target, ) key = "{}.{}".format(self.task_run.task.task_name, parameter.name) target.target_meta = get_value_meta_from_value( key, value, meta_conf) self.tracking_store.log_target( task_run=self.task_run, target=target, target_meta=target.target_meta, operation_type=operation_type, operation_status=operation_status, param_name=parameter.name, task_def_uid=parameter.task_definition_uid, ) except Exception as ex: log_exception( "Error occurred during target logging for %s" % (target, ), ex, non_critical=True, )
def get_value_meta_from_value( value_name, value, meta_conf, raise_on_error=False ): # type: (str, Any, ValueMetaConf, bool) -> Optional[ValueMeta] if value is None: return None obj_value_type = get_value_type_of_obj( value, default_value_type=ObjectValueType()) if obj_value_type is None: logger.info("Can't detect known type for '%s' with type='%s' ", value_name, type(value)) return None try: return obj_value_type.get_value_meta(value, meta_conf=meta_conf) except Exception as ex: log_exception( "Failed to get value meta info for '%s' with type='%s'" " ( detected as %s)" % (value_name, type(value), obj_value_type), ex, non_critical=not raise_on_error, ) if raise_on_error: raise return None
def _invoke(self, name, kwargs): res = None failed_stores = [] for store in self._stores: try: handler = getattr(store, name) handler_res = handler(**kwargs) if handler_res: res = handler_res except DatabandConnectionException as ex: logger.error( "Failed to store tracking information from %s at %s : %s" % (name, str(store), str(ex))) if self._remove_failed_store: failed_stores.append(store) if self._raise_on_error: raise except Exception as ex: if self._remove_failed_store: failed_stores.append(store) log_exception( "Failed to store tracking information from %s at %s: %s" % (name, str(store), str(ex)), ex, non_critical=True, ) if failed_stores: for store in failed_stores: logger.warning( "Removing store %s from stores list due to failure" % (str(store), )) self._stores.remove(store) return res
def _log_parameter_value(task_run, parameter_definition, target, value): # make sure it will be logged correctly parameter_definition = parameter_definition.modify( value_meta_conf=ValueMetaConf( log_preview=True, log_schema=True, )) try: # case what if result is Proxy value_type = get_value_type_of_obj(value, parameter_definition.value_type) task_run.run.target_origin.add(target, value, value_type) except Exception as ex: log_exception("Failed to register result to target tracking.", ex=ex, non_critical=True) try: task_run.tracker.log_parameter_data( parameter= parameter_definition, # was: task_run.task.task_definition.task_class.result, target=target, value=value, operation_type=DbndTargetOperationType. write, # is it write? (or log?) operation_status=DbndTargetOperationStatus.OK, ) except Exception as ex: log_exception("Failed to log result to tracking store.", ex=ex, non_critical=True)
def _log_inputs(task_run): """ For tracking mode. Logs InMemoryTarget inputs. """ try: params = task_run.task._params for param in params.get_params(input_only=True): value = params.get_value(param.name) if isinstance(value, InMemoryTarget): try: param = param.modify(value_meta_conf=ValueMetaConf( log_preview=True, log_schema=True, )) task_run.tracker.log_parameter_data( parameter=param, target=value, value=value._obj, operation_type=DbndTargetOperationType.read, operation_status=DbndTargetOperationStatus.OK, ) except Exception as ex: log_exception( "Failed to log input param to tracking store.", ex=ex, non_critical=True, ) except Exception as ex: log_exception("Failed to log input params to tracking store.", ex=ex, non_critical=True)
def _log_inputs(task_run): """ For tracking mode. Logs InMemoryTarget inputs. """ try: params = task_run.task._params for param_value in params.get_param_values(ParameterFilters.INPUTS): param, value = param_value.parameter, param_value.value if isinstance(param_value, InMemoryTarget): try: param = param.update_value_meta_conf_from_runtime_value( value, task_run.run.context.settings.tracking) task_run.tracker.log_parameter_data( parameter=param, target=param_value, value=value, operation_type=DbndTargetOperationType.read, operation_status=DbndTargetOperationStatus.OK, ) except Exception as ex: log_exception( "Failed to log input param to tracking store.", ex=ex, non_critical=True, ) except Exception as ex: log_exception("Failed to log input params to tracking store.", ex=ex, non_critical=True)
def log_parameter_data(self, parameter, target, value, operation_type, operation_status): # type: (TaskRunTracker, ParameterDefinition, Target, Any, DbndTargetOperationType, DbndTargetOperationStatus) -> None tracking_conf = self.settings.tracking if not tracking_conf.log_value_meta or value is None: return try: target.target_meta = get_value_meta( value, parameter.value_meta_conf, tracking_config=tracking_conf, value_type=parameter.value_type, target=target, ) # FIXME If we failed to get target meta for some reason, target operation won't be logged! if target.target_meta is None: return self.tracking_store.log_target( task_run=self.task_run, target=target, target_meta=target.target_meta, operation_type=operation_type, operation_status=operation_status, param_name=parameter.name, task_def_uid=parameter.task_definition_uid, ) except Exception as ex: log_exception( "Error occurred during target logging for %s" % (target, ), ex, non_critical=True, )
def log_dataframe(self, key, df, meta_conf): # type: (str, Union[pd.DataFrame, spark.DataFrame], ValueMetaConf) -> None try: # Combine meta_conf with the config settings meta_conf = self.settings.features.get_value_meta_conf(meta_conf) value_meta = get_value_meta_for_metric(key, df, meta_conf=meta_conf) if not value_meta: return # TODO: Performance optimization opportunity: batch log_metric API calls if value_meta.data_dimensions: self._log_metric("%s.shape" % key, value_meta.data_dimensions) for dim, size in enumerate(value_meta.data_dimensions): self._log_metric("%s.shape[%s]" % (key, dim), size) if meta_conf.log_schema: self._log_metric( "%s.schema" % key, value_meta.data_schema, is_json=True ) if meta_conf.log_preview: self._log_metric("%s.preview" % key, value_meta.value_preview) if meta_conf.log_df_hist: self._log_histograms(key, value_meta) except Exception as ex: log_exception( "Error occurred during log_dataframe for %s" % (key,), ex, non_critical=True, )
def banner( self, msg, color=None, verbose=False, print_task_band=False, task_run=None, exc_info=None, ): try: b = TextBanner(msg, color) if verbose or is_verbose(): verbosity = FormatterVerbosity.HIGH else: verbosity = FormatterVerbosity.NORMAL builder = _TaskBannerBuilder( task=self.task, banner=b, verbosity=verbosity, print_task_band=print_task_band, ) return builder.build_banner(task_run=task_run, exc_info=exc_info).get_banner_str() except Exception as ex: log_exception( "Failed to calculate banner for '%s'" % self.task_id, ex, non_critical=True, ) return msg + (" ( task_id=%s)" % self.task_id)
def _log_parameter_value(task_run, parameter_definition, target, value): # type: (TaskRun, ParameterDefinition, Target, Any) -> None # make sure it will be logged correctly parameter_definition = ( parameter_definition.update_value_meta_conf_from_runtime_value( value, task_run.run.context.settings.tracking)) try: # case what if result is Proxy value_type = get_value_type_of_obj(value, parameter_definition.value_type) task_run.run.target_origin.add(target, value, value_type) except Exception as ex: log_exception("Failed to register result to target tracking.", ex=ex, non_critical=True) try: task_run.tracker.log_parameter_data( parameter=parameter_definition, target=target, value=value, operation_type=DbndTargetOperationType. write, # is it write? (or log?) operation_status=DbndTargetOperationStatus.OK, ) except Exception as ex: log_exception("Failed to log result to tracking store.", ex=ex, non_critical=True)
def log_metric(self, key, value, timestamp=None, source=None): try: self._log_metric(key, value, timestamp=timestamp, source=source) except Exception as ex: log_exception( "Error occurred during log_metric for %s" % (key,), ex, non_critical=True, )
def log_data( self, key, # type: str data, # type: Union[pd.DataFrame, spark.DataFrame, PostgresTable, SnowflakeTable] meta_conf, # type: ValueMetaConf path=None, # type: Optional[Union[Target,str]] operation_type=DbndTargetOperationType. read, # type: DbndTargetOperationType operation_status=DbndTargetOperationStatus. OK, # type: DbndTargetOperationStatus raise_on_error=False, # type: bool ): # type: (...) -> None try: # Combine meta_conf with the config settings meta_conf = self.settings.tracking.get_value_meta_conf(meta_conf) value_meta = get_value_meta_from_value(key, data, meta_conf, raise_on_error) if not value_meta: logger.warning( "Couldn't log the wanted data {name}, reason - can't log objects of type {value_type} " .format(name=key, value_type=type(data))) return ts = utcnow() if path: self.tracking_store.log_target( task_run=self.task_run, target=path, target_meta=value_meta, operation_type=operation_type, operation_status=operation_status, param_name=key, ) metrics = value_meta.build_metrics_for_key(key, meta_conf) if metrics["user"]: self._log_metrics(metrics["user"]) if metrics["histograms"]: self.tracking_store.log_histograms(task_run=self.task_run, key=key, value_meta=value_meta, timestamp=ts) if not (metrics["user"] or metrics["histograms"] or path): logger.info("No metrics to log_data(key={}, data={})".format( key, data)) except Exception as ex: log_exception( "Error occurred during log_dataframe for %s" % (key, ), ex, non_critical=not raise_on_error, ) if raise_on_error: raise
def collect_data_from_dbt_cloud( dbt_cloud_account_id, dbt_cloud_api_token, dbt_job_run_id ): """ Collect metadata for a single run from dbt cloud. Args: dbt_cloud_account_id: dbt cloud account id in order to identify with dbt cloud API dbt_cloud_api_token: Api token in order to authenticate dbt cloud API dbt_job_run_id: run id of the dbt run that we want to report it's metadata. @task def prepare_data(): collect_data_from_dbt_cloud( dbt_cloud_account_id=my_dbt_cloud_account_id, dbt_cloud_api_token="my_dbt_cloud_api_token", dbt_job_run_id=12345 ) """ if not dbt_cloud_api_token or not dbt_cloud_account_id: logger.warning( "Can't collect Data from dbt cloud, account id nor api key were supplied" ) return try: dbt_cloud_client = DbtCloudApiClient( account_id=dbt_cloud_account_id, dbt_cloud_api_token=dbt_cloud_api_token ) dbt_run_meta_data = dbt_cloud_client.get_run(dbt_job_run_id) if not dbt_run_meta_data: logger.warning("Fail getting run data from dbt cloud ") return env_id = dbt_run_meta_data.get("environment_id") env = dbt_cloud_client.get_environment(env_id=env_id) if env: dbt_run_meta_data["environment"] = env for step in dbt_run_meta_data.get("run_steps", []): step_run_results_artifact = dbt_cloud_client.get_run_results_artifact( dbt_job_run_id, step["index"] ) if step_run_results_artifact: step["run_results"] = step_run_results_artifact step_run_manifest_artifact = dbt_cloud_client.get_manifest_artifact( dbt_job_run_id, step["index"] ) if step_run_manifest_artifact: step["manifest"] = step_run_manifest_artifact _report_dbt_metadata(dbt_run_meta_data) except Exception as e: log_exception("Could not collect data from dbt cloud", e)
def log_metric(self, key, value, timestamp=None, source=None): # type: (str, Any, Optional[datetime], Optional[MetricSource]) -> None try: self.log_metrics({key: value}, source, timestamp) except Exception as ex: log_exception( "Error occurred during log_metric for %s" % (key, ), ex, non_critical=True, )
def log_metric(self, key, value, timestamp=None, source=None): # type: (str, Any, Optional[datetime], Optional[MetricSource]) -> None try: metric = Metric( key=key, value=value, source=source, timestamp=timestamp or utcnow(), ) self._log_metrics([metric]) except Exception as ex: log_exception( "Error occurred during log_metric for %s" % (key,), ex, non_critical=True, )
def _log_result(task_run, result): # type: (TaskRun, Any) -> None """ For tracking mode. Logs the task result and adds it to the target_origin map to support relationships between dynamic tasks. """ try: result_param = task_run.task.task_params.get_param_value(RESULT_PARAM) if not result_param: logger.debug("No result params to log for task {}".format( task_run.task_af_id)) return # we now the parameter value is a target because this is an output param # the target is created in the task creation result_param_def, result_target = result_param.parameter, result_param.value # spread result into relevant fields. if isinstance(result_param_def, FuncResultParameter): # assign all returned values to relevant band Outputs if result is None: return for result_name, value in result_param_def.named_results(result): # we now the parameter value is a target because this is an output param # the target is created in the task creation parameter_value = task_run.task.task_params.get_param_value( result_name) _log_parameter_value( task_run, parameter_definition=parameter_value.parameter, target=parameter_value.value, value=value, ) else: _log_parameter_value( task_run, parameter_definition=result_param_def, target=result_target, value=result, ) except Exception as ex: log_exception("Failed to log result to tracking store.", ex=ex, non_critical=True)
def log_data( self, key, # type: str data, # type: Union[pd.DataFrame, spark.DataFrame, PostgresTable, SnowflakeTable] meta_conf, # type: ValueMetaConf path=None, # type: Optional[Union[Target,str]] operation_type=DbndTargetOperationType. read, # type: DbndTargetOperationType operation_status=DbndTargetOperationStatus. OK, # type: DbndTargetOperationStatus ): # type: (...) -> None try: # Combine meta_conf with the config settings meta_conf = self.settings.features.get_value_meta_conf(meta_conf) value_meta = get_value_meta_from_value(key, data, meta_conf=meta_conf) if not value_meta: return ts = utcnow() if path: self.tracking_store.log_target( task_run=self.task_run, target=path, target_meta=value_meta, operation_type=operation_type, operation_status=operation_status, ) metrics = value_meta.build_metrics_for_key(key, meta_conf) if metrics["user"]: self._log_metrics(metrics["user"]) if metrics["histograms"]: self.tracking_store.log_histograms(task_run=self.task_run, key=key, value_meta=value_meta, timestamp=ts) except Exception as ex: log_exception( "Error occurred during log_dataframe for %s" % (key, ), ex, non_critical=True, )
def log_artifact(self, name, artifact): try: # file storage will save file # db will save path artifact_target = self.task_run.meta_files.get_artifact_target(name) self.tracking_store.log_artifact( task_run=self.task_run, name=name, artifact=artifact, artifact_target=artifact_target, ) except Exception as ex: log_exception( "Error occurred during log_artifact for %s" % (name,), ex, non_critical=True, )
def log_data( self, key, # type: str data, # type: Union[pd.DataFrame, spark.DataFrame, PostgresTable, SnowflakeTable] meta_conf, # type: ValueMetaConf path=None, # type: Optional[Union[Target,str]] operation_type=DbndTargetOperationType. read, # type: DbndTargetOperationType operation_status=DbndTargetOperationStatus. OK, # type: DbndTargetOperationStatus raise_on_error=False, # type: bool ): # type: (...) -> None try: # Combine meta_conf with the config settings value_meta = get_value_meta(data, meta_conf, tracking_config=self.settings.tracking) if not value_meta: logger.warning( "Couldn't log the wanted data {name}, reason - can't log objects of type {value_type} " .format(name=key, value_type=type(data))) return ts = utcnow() if path: self.tracking_store.log_target( task_run=self.task_run, target=path, target_meta=value_meta, operation_type=operation_type, operation_status=operation_status, param_name=key, ) self.log_value_metrics(key, meta_conf, value_meta, ts) except Exception as ex: log_exception( "Error occurred during log_dataframe for %s" % (key, ), ex, non_critical=not raise_on_error, ) if raise_on_error: raise
def banner( self, msg, color=None, verbose=False, print_task_band=False, task_run=None, exc_info=None, ): task_id = self.task.task_id try: # Saving banner for testability self._banner = TextBanner(msg, color) if verbose or is_verbose(): verbosity = FormatterVerbosity.HIGH else: verbosity = FormatterVerbosity.NORMAL builder = _TaskBannerBuilder( task=self.task, banner=self._banner, verbosity=verbosity, print_task_band=print_task_band, ) # different banners for tracking and orchestration if TaskEssence.TRACKING.is_instance(self.task): builder.build_tracking_banner(task_run=task_run, exc_info=exc_info) else: if TaskEssence.CONFIG.is_instance(self.task): builder.build_config_banner() else: builder.build_orchestration_banner(task_run=task_run, exc_info=exc_info) return self._banner.get_banner_str() except Exception as ex: log_exception("Failed to calculate banner for '%s'" % task_id, ex, non_critical=True) return msg + (" ( task_id=%s)" % task_id)
def _thread_worker(self) -> None: failed_on_previous_iteration = False while True: item = self.queue.get() try: if item is _TERMINATOR: break if not failed_on_previous_iteration: self.item_processing_handler(item) else: self.skip_processing_callback(item) except Exception as e: # It's better to continue cleanning the queue with queue.task_done() to avoid hanging on queue.join() failed_on_previous_iteration = True err_msg = "TrackingAsyncWebChannelBackgroundWorker will skip processing next events" log_exception(err_msg, e, logger) finally: self.queue.task_done() sleep(0)
def try_run_handler(tries, store, handler_name, kwargs): # type: (int, TrackingStore, str, Dict[str, Any]) -> Any """ Locate the handler function to run and will try to run it multiple times. If fails all the times -> raise the last error. @param tries: maximum amount of retries, positive integer. @param store: the store to run its handler @param handler_name: the name of the handler to run @param kwargs: the input for the handler @return: The result of the handler if succeeded, otherwise raise the last error """ try_num = 1 handler = getattr(store, handler_name) while True: try: return handler(**kwargs) except DatabandAuthenticationError as ex: # No need to retry with the same credentials, log exception and exit immediately. log_exception( f"Auth Failed storing tracking information from {handler_name} at {str(store)}", ex, non_critical=True, ) # Failed auth is also not a reson to stop attempting to track subsequent request. # We have an option to process requests with failed auth on demand. # So, fake success: return {} except Exception as ex: log_exception( f"Try {try_num}/{tries}: Tracking failed from {handler_name} at {str(store)}", ex, non_critical=True, ) if try_num == tries: # raise on the last try raise try_num += 1
def log_dataframe(self, key, df, with_preview=True): try: value_meta = get_value_meta_for_metric(key, df, with_preview=with_preview) if not value_meta: return if value_meta.data_dimensions: self._log_metric("%s.shape" % key, value_meta.data_dimensions) for dim, size in enumerate(value_meta.data_dimensions): self._log_metric("%s.shape[%s]" % (key, dim), size) self._log_metric("%s.schema" % key, value_meta.data_schema) if with_preview: self._log_metric("%s.preview" % key, value_meta.value_preview) except Exception as ex: log_exception( "Error occurred during log_dataframe for %s" % (key,), ex, non_critical=True, )
def log_target(self, parameter, target, value, operation_type, operation_status): # type: (TaskRunTracker, ParameterDefinition, Target, Any, DbndTargetOperationType, DbndTargetOperationStatus) -> None try: target_meta = parameter.get_value_meta(value) target.target_meta = target_meta self.tracking_store.log_target( task_run=self.task_run, target=target, target_meta=target_meta, operation_type=operation_type, operation_status=operation_status, param_name=parameter.name, task_def_uid=parameter.task_definition_uid, ) except Exception as ex: log_exception( "Error occurred during target metrics save for %s" % (target,), ex, non_critical=True, )
def get_value_meta_from_value( value_name, value, meta_conf): # type: (str, Any, ValueMetaConf) -> Optional[ValueMeta] if value is None: return None obj_value_type = get_value_type_of_obj(value) if obj_value_type is None: logger.info("Can't detect known type for '%s' with type='%s' ", value_name, type(value)) return None try: return obj_value_type.get_value_meta(value, meta_conf=meta_conf) except Exception as ex: log_exception( "Failed to get value meta info for '%s' with type='%s'" " ( detected as %s)" % (value_name, type(value), obj_value_type), ex, non_critical=True, ) return None
def flush(self): # skip the handler if worker already exited to avoid hanging if not self._background_worker.is_alive: return # process remaining items in the queue tracking_duration = (utcnow() - self._start_time).in_seconds() # don't exceed 10% of whole tracking duration while flushing but not less then 2m and no more then 30m flush_limit = min(max(tracking_duration * 0.1, 120), 30 * 60) logger.info( f"Waiting {flush_limit}s for TrackingAsyncWebChannel to complete async tasks..." ) self._shutting_down = True try: self._background_worker.flush(flush_limit) self.web_channel.flush() logger.info("TrackingAsyncWebChannel completed all tasks") except TimeoutError as e: err_msg = f"TrackingAsyncWebChannel flush exceeded {flush_limit}s timeout" log_exception(err_msg, e, logger) finally: self._shutting_down = False
def add_params_table( self, all_params=False, param_kind=True, param_type=True, param_format=False, param_source=False, param_section=False, param_default=False, ): # config self.kind = param_kind self.type = param_type self.format = param_format self.source = param_source self.section = param_section self.default = param_default """adds the param table to the banner""" from dbnd._core.task_build.task_results import ResultProxyTarget exclude = set(_TASK_FIELDS) # excluding those # special treatment for result t_result = getattr(self.task, "result", None) if isinstance(t_result, ResultProxyTarget): self.banner.column("RESULT", t_result) exclude.add("result") params_data = [] params_warnings = [] # iterating over the params and building the data for the table for param_value in self.task._params: try: param_def = (param_value.parameter. update_value_meta_conf_from_runtime_value( param_value.value, self.task.settings.tracking)) # filter switches if not all_params: if param_def.name in exclude: continue if param_def.system: continue # building a single row if param_def.value_meta_conf.log_preview: param_value_preview = param_value.value else: param_value_preview = ParameterValue.MASKED_VALUE_PREVIEW param_row = self.build_record(param_def, param_value, param_value_preview) params_data.append(param_row) # extract param warnings if param_value and param_value.warnings: params_warnings.extend(param_value.warnings) except Exception as ex: log_exception( "Failed to calculate parameter value for %s.%s" % (self.task.task_name, param_value.parameter.name), ex, non_critical=True, ) if params_warnings: self.banner.column("BUILD WARNINGS:", "") self.banner.write("".join("\t%s\n" % pw for pw in params_warnings)) if params_data: params_header = self.build_headers() params_table = safe_tabulate(tabular_data=params_data, headers=params_header) self.banner.column("PARAMS:", "") self.banner.write(params_table) self.banner.new_line()