def test_metadata_does_not_require_file_read(self): columns = [ Column("A", ColumnType.Number(format="{:,.2f}")), Column("B", ColumnType.Timestamp()), Column("C", ColumnType.Text()), Column("D", ColumnType.Date("month")), ] with arrow_table_context( make_column("A", [1], format="{:,.2f}"), make_column("B", [datetime.datetime(2021, 4, 13)]), make_column("C", ["c"]), make_column("D", [datetime.date(2021, 4, 1)], unit="month"), ) as (path, table): result = LoadedRenderResult( path=path, table=table, columns=columns, errors=[], json={} ) cache_render_result(self.workflow, self.step, 1, result) # Delete from disk entirely, to prove we did not read. s3.remove(BUCKET, crr_parquet_key(self.step.cached_render_result)) # Load _new_ CachedRenderResult -- from DB columns, not memory fresh_step = Step.objects.get(id=self.step.id) cached_result = fresh_step.cached_render_result self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns))
def test_date_unit_day_ok(self): table = pa.table( [pa.array([date(2021, 4, 4)])], pa.schema([pa.field("A", pa.date32(), metadata={b"unit": b"day"})]), ) self.assertEqual(read_columns(table), [Column("A", ColumnType.Date(unit="day"))])
def test_date_unit_month_ok(self): table = pa.table( [pa.array([date(1200, 12, 1), date(3199, 2, 1), None])], pa.schema( [pa.field("A", pa.date32(), metadata={b"unit": b"month"})]), ) self.assertEqual(read_columns(table), [Column("A", ColumnType.Date(unit="month"))])
def render(arrow_table, params, output_path, *, columns, **kwargs): # Test the "columns" kwarg # # TODO nix this! The only module that uses it is `converttotext`. self.assertEqual( columns, [ Column("A", ColumnType.Number("{:,.3f}")), Column("B", ColumnType.Number("{:,.3f}")), Column("C", ColumnType.Number("{:,.3f}")), Column("D", ColumnType.Timestamp()), Column("E", ColumnType.Timestamp()), Column("F", ColumnType.Timestamp()), Column("G", ColumnType.Text()), Column("H", ColumnType.Text()), Column("I", ColumnType.Text()), Column("J", ColumnType.Date(unit="day")), Column("K", ColumnType.Date(unit="week")), Column("L", ColumnType.Text()), ], ) table = pa.table( { "A": [1], "B": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "C": ["a"], "D": [1], "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "F": ["a"], "G": [1], "H": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "I": ["a"], "J": pa.array([date(2021, 4, 1)]), "K": pa.array([date(2021, 4, 12)]), "L": pa.array([date(2021, 4, 1)]), } ) schema = table.schema.set( table.schema.get_field_index("J"), pa.field("J", pa.date32(), metadata={"unit": "month"}), ) with pa.ipc.RecordBatchFileWriter(output_path, schema) as writer: writer.write_table(pa.table(table.columns, schema=schema)) return []
def test_date_unit_year_ok(self): table = pa.table( [ pa.array( [date(1900, 1, 1), date(1, 1, 1), date(9999, 1, 1), None]) ], pa.schema( [pa.field("A", pa.date32(), metadata={b"unit": b"year"})]), ) self.assertEqual(read_columns(table), [Column("A", ColumnType.Date(unit="year"))])
def Date(name: str, unit: str) -> Column: return Column(name, ColumnType.Date(unit))