def task_with_log_dataset_wrapper(): with dataset_op_logger( op_path=target("/path/to/value.csv"), data=pandas_data_frame, op_type="write", with_preview=True, ) as logger: ans = 42 ans / 0
def execute(): for int in range(randint(0, 10)): sleep(randint(0, 10)) log_metric(f"interation_{int}", int) with dataset_op_logger("databricks://test/load/read", "read") as logger: data = {"row_1": [3, 2, 1, 0], "row_2": ["a", "b", "c", "d"]} read_df = pd.DataFrame.from_dict(data, orient="index") logger.set(data=read_df) with dataset_op_logger("databricks://test/load/write", "write") as logger: data = {"row_1": [3, 2, 1, 0], "row_2": ["a", "b", "c", "d"]} write_df = pd.DataFrame.from_dict(data, orient="index", columns=["A", "B", "C", "D"]) logger.set(data=write_df)
def write_data(dest_dir, df): for date_time, grouped in df.groupby(pd.Grouper(key="dates", freq="H")): file_path = os.path.join( dest_dir, "date={}/time={}".format(date_time.strftime("%Y%m%d"), date_time.strftime("%H%M%S")), "data_data_x.parquet", ) if not grouped.empty: try: with dataset_op_logger(op_path=file_path, op_type="write", data=grouped, send_metrics=False): randomly_fail() except: pass
def task_with_log_dataset_wrapper(): with dataset_op_logger(op_path=target("/path/to/value.csv"), op_type="read", with_preview=True) as logger: logger.set(data=pandas_data_frame, row_count=999)