def task_with_log_dataset_wrapper():
     with dataset_op_logger(
             op_path=target("/path/to/value.csv"),
             data=pandas_data_frame,
             op_type="write",
             with_preview=True,
     ) as logger:
         ans = 42
         ans / 0
        def execute():
            for int in range(randint(0, 10)):
                sleep(randint(0, 10))
                log_metric(f"interation_{int}", int)

            with dataset_op_logger("databricks://test/load/read",
                                   "read") as logger:
                data = {"row_1": [3, 2, 1, 0], "row_2": ["a", "b", "c", "d"]}
                read_df = pd.DataFrame.from_dict(data, orient="index")
                logger.set(data=read_df)

            with dataset_op_logger("databricks://test/load/write",
                                   "write") as logger:
                data = {"row_1": [3, 2, 1, 0], "row_2": ["a", "b", "c", "d"]}
                write_df = pd.DataFrame.from_dict(data,
                                                  orient="index",
                                                  columns=["A", "B", "C", "D"])
                logger.set(data=write_df)
Exemple #3
0
def write_data(dest_dir, df):
    for date_time, grouped in df.groupby(pd.Grouper(key="dates", freq="H")):
        file_path = os.path.join(
            dest_dir,
            "date={}/time={}".format(date_time.strftime("%Y%m%d"),
                                     date_time.strftime("%H%M%S")),
            "data_data_x.parquet",
        )

        if not grouped.empty:
            try:
                with dataset_op_logger(op_path=file_path,
                                       op_type="write",
                                       data=grouped,
                                       send_metrics=False):
                    randomly_fail()
            except:
                pass
 def task_with_log_dataset_wrapper():
     with dataset_op_logger(op_path=target("/path/to/value.csv"),
                            op_type="read",
                            with_preview=True) as logger:
         logger.set(data=pandas_data_frame, row_count=999)