def log_metrics(metrics_dict, source="user", timestamp=None): # type: (Dict[str, Any], str, datetime) -> None """ Log multiple key-value pairs as metrics to dbnd. Args: metrics_dict: Key-value pairs of metrics to log. source: Optional name of the metrics source, default is user. timestamp: Optional timestamp of the metrics. Example:: @task def log_lowercase_letters(): # all lower alphabet chars -> {"a": 97,..., "z": 122} log_metrics({chr(i): i for i in range(97, 123)}) """ tracker = _get_tracker() if tracker: tracker.log_metrics(metrics_dict, source=source, timestamp=timestamp) return message = TRACKER_MISSING_MESSAGE % ("log_metrics", ) get_one_time_logger().log_once(message, "log_metrics", logging.WARNING) logger.info("Log multiple metrics from {source}: {metrics}".format( source=source.capitalize(), metrics=metrics_dict))
def log_metric(key, value, source="user"): # type: (str, Any, Optional[str]) -> None """ Log key-value pair as a metric to dbnd. Args: key: Name of the metric. value: Value of the metric. source: The source of the metric, default is user. Example:: def calculate_alpha(alpha): alpha *= 1.1 log_metric("alpha", alpha) """ tracker = _get_tracker() if tracker: tracker.log_metric(key, value, source=source) return message = TRACKER_MISSING_MESSAGE % ("log_metric", ) get_one_time_logger().log_once(message, "log_metric", logging.WARNING) logger.info("Log {} Metric '{}'='{}'".format(source.capitalize(), key, value)) return
def _report_operation(operation_report): # type: (DatasetOperationReport) -> None tracker = _get_tracker() if not tracker: message = TRACKER_MISSING_MESSAGE % ("report_operation", ) get_one_time_logger().log_once(message, "report_operation", logging.WARNING) return tracker.log_dataset(op_report=operation_report)
def _report_dbt_metadata(dbt_metadata, tracker=None): if tracker is None: tracker = _get_tracker() if not tracker: message = TRACKER_MISSING_MESSAGE % ("report_dbt_metadata",) get_one_time_logger().log_once(message, "report_dbt_metadata", logging.WARNING) return tracker.log_dbt_metadata(dbt_metadata=dbt_metadata)
def log_data( key, # type: str value=None, # type: Union[pd.DataFrame, spark.DataFrame, PostgresTable, SnowflakeTable] path=None, # type: Optional[str] operation_type=DbndTargetOperationType. read, # type: DbndTargetOperationType with_preview=None, # type: Optional[bool] with_size=None, # type: Optional[bool] with_schema=None, # type: Optional[bool] with_stats=None, # type: Optional[Union[bool, str, List[str], LogDataRequest]] with_histograms=None, # type: Optional[Union[bool, str, List[str], LogDataRequest]] raise_on_error=False, # type: bool ): # type: (...) -> None """ Log data information to dbnd. @param key: Name of the data. @param value: Value of the data, currently supporting only dataframes and tables view. @param path: Optional target or path representing a target to connect the data to. @param operation_type: Type of the operation doing with the target - reading or writing the data? @param with_preview: True if should log a preview of the data. @param with_size: True if should log the size of the data. @param with_schema: True if should log the schema of the data. @param with_stats: True if should calculate and log stats of the data. @param with_histograms: True if should calculate and log histogram of the data. @param raise_on_error: raise if error occur. """ tracker = _get_tracker() if not tracker: message = TRACKER_MISSING_MESSAGE % ("log_data", ) get_one_time_logger().log_once(message, "log_data", logging.WARNING) return meta_conf = ValueMetaConf( log_preview=with_preview, log_schema=with_schema, log_size=with_size, log_stats=with_stats, log_histograms=with_histograms, ) tracker.log_data( key, value, meta_conf=meta_conf, path=path, operation_type=operation_type, raise_on_error=raise_on_error, )
def log_artifact(key, artifact): """ Log a local file or directory as an artifact of the currently active run. Args: key: The key by which to log the artifact artifact: The artifact to log Example:: def prepare_data(data): lorem = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt" data.write(lorem) log_artifact("my_tmp_file", str(data)) """ tracker = _get_tracker() if tracker: tracker.log_artifact(key, artifact) return message = TRACKER_MISSING_MESSAGE % ("log_artifact", ) get_one_time_logger().log_once(message, "log_artifact", logging.WARNING) logger.info("Artifact %s=%s", key, artifact)
def handle_callable_call(self, *call_args, **call_kwargs): dbnd_project_config = get_dbnd_project_config() if dbnd_project_config.disabled: return self.class_or_func(*call_args, **call_kwargs) # we are at tracking mode if dbnd_project_config.is_tracking_mode(): with self.tracking_context(call_args, call_kwargs) as track_result_callback: fp_result = self.class_or_func(*call_args, **call_kwargs) return track_result_callback(fp_result) #### DBND ORCHESTRATION MODE # # -= Use "Step into My Code"" to get back from dbnd code! =- # # decorated object call/creation ( my_func(), MyDecoratedTask() # we are at orchestration mode task_cls = self.get_task_cls() if is_in_airflow_dag_build_context(): # we are in Airflow DAG building mode - AIP-31 return build_task_at_airflow_dag_context( task_cls=task_cls, call_args=call_args, call_kwargs=call_kwargs ) current = try_get_current_task() if not current: # no tracking/no orchestration, # falling back to "natural call" of the class_or_func message = ( "Can't report tracking info. %s is decorated with @task, but no tracking context was found" % (self.class_or_func.__name__,) ) get_one_time_logger().log_once(message, "task_decorator", logging.WARNING) return self.class_or_func(*call_args, **call_kwargs) ###### # current is not None, and we are not in tracking/airflow/luigi # this is DBND Orchestration mode # we can be in the context of task.run() or in task.band() # called from user code using user_decorated_func() or UserDecoratedTask() if self.is_class: call_kwargs.pop("__call_original_cls", False) # we should not get here from _TaskFromTaskDecorator.invoke() # at that function we should call user code directly phase = current_phase() if phase is TaskContextPhase.BUILD: # we are in the @pipeline.band() context, we are building execution plan t = task_cls(*call_args, **call_kwargs) # we are in the band, and if user_code() is called we want to remove redundant # `user_code().result` usage if t.task_definition.single_result_output: return t.result # we have multiple outputs (more than one "output" parameter) # just return task object, user will use it as `user_code().output_1` return t elif phase is TaskContextPhase.RUN: # we are "running" inside some other task execution (orchestration!) # (inside user_defined_function() or UserDefinedTask.run() # if possible we will run it as "orchestration" task # with parameters parsing if ( current.settings.run.task_run_at_execution_time_enabled and current.task_supports_dynamic_tasks ): return self._run_task_from_another_task_execution( parent_task=current, call_args=call_args, call_kwargs=call_kwargs ) # we can not call it in "dbnd" way, fallback to normal call if self.is_class: call_kwargs["__call_original_cls"] = False return self.class_or_func(*call_args, **call_kwargs) else: raise Exception()