Ejemplo n.º 1
0
 def task_with_log_datasets():
     log_dataset_op(
         "location://path/to/value.csv",
         "read",  # Check passing str values too
         success=False,
         with_schema=False,
     )
Ejemplo n.º 2
0
 def task_with_log_datasets():
     log_dataset_op(
         "location://path/to/value.csv",
         DbndDatasetOperationType.read,
         row_count=987,
         column_count=4,
     )
Ejemplo n.º 3
0
    def flush_operations(self, connection: PostgresConnectionWrapper):
        if connection in self.connections:
            for op in self.connections.get_operations(connection):

                if self.conf.with_schema:
                    op.extract_schema(connection)
                    if self.conf.with_stats:
                        op.extract_stats(connection)
                if self.conf.with_preview:
                    op.extract_preview(connection)

                log_dataset_op(
                    op_path=op.render_connection_path(connection),
                    op_type=op.op_type,
                    success=op.success,
                    data=op,
                    error=op.error,
                    with_preview=self.conf.with_preview,
                    send_metrics=self.conf.send_metrics,
                    with_schema=self.conf.with_schema,
                    with_partition=self.conf.with_partition,
                    with_stats=self.conf.with_stats,
                    with_histograms=self.conf.with_histograms,
                )
            # we clean all the batch of operations we reported so we don't report twice
            self.connections.clear_operations(connection)
Ejemplo n.º 4
0
    def report_operations(self, connection: SnowflakeConnection,
                          operations: List[SqlOperation]):
        if connection.is_closed():
            # already closed, cannot proceed (and probably already tracked)
            return
        # update the tables names
        operations = [op.evolve_table_name(connection) for op in operations]

        # looks for tables schemas
        tables = chain.from_iterable(op.tables for op in operations
                                     if not op.is_file)

        tables_schemas: Dict[str, DTypes] = {}
        for table in tables:
            table_schema = get_snowflake_table_schema(connection, table)
            if table_schema:
                tables_schemas[table] = table_schema

        operations: List[SqlOperation] = [
            op.evolve_schema(tables_schemas) for op in operations
        ]

        for op in operations:
            log_dataset_op(
                op_path=render_connection_path(connection, op, "snowflake"),
                op_type=op.op_type,
                success=op.success,
                data=op,
                with_schema=True,
                send_metrics=True,
            )
Ejemplo n.º 5
0
 def task_with_log_datasets():
     log_dataset_op(
         "/path/to/value.csv",
         DbndDatasetOperationType.read,
         data=pandas_data_frame,
         with_preview=True,
         with_schema=True,
     )
Ejemplo n.º 6
0
def get_data(source: str, days: int):
    today = pd.Timestamp.now().today()
    start_date = today - pd.Timedelta(days=days)

    name = (
        f"test_data_{today.strftime('%Y-%m-%d')}_{start_date.strftime('%Y-%m-%d')}.csv"
    )
    file_path = os.path.join(source, name)

    records_amounts = int((days * 24 * 10) * uniform(0.5, 1.5))
    df = pd.DataFrame(np.random.randint(0, 100, size=(records_amounts, 4)),
                      columns=list("ABCD"))
    df["dates"] = [random_date(start_date, today) for _ in range(len(df))]

    log_dataset_op(
        op_path=file_path,
        op_type=DbndDatasetOperationType.read,
        data=df,
        with_schema=True,
        with_preview=True,
        with_histograms=True,
    )
    return df
Ejemplo n.º 7
0
 def task_with_log_datasets():
     log_dataset_op(
         "location://path/to/value.csv",
         DbndDatasetOperationType.read,
         with_schema=False,
     )
Ejemplo n.º 8
0
 def task_with_log_datasets():
     a_target = target("/path/to/value.csv")
     log_dataset_op(a_target,
                    DbndDatasetOperationType.read,
                    with_schema=False)