def test_render_with_input_columns(self): def render(*args, input_columns): self.assertEqual( input_columns, { "A": ptypes.RenderColumn("A", "text", None), "B": ptypes.RenderColumn("B", "number", "{:,.3f}"), "C": ptypes.RenderColumn("C", "datetime", None), }, ) with arrow_table_context( { "A": ["x"], "B": [1], "C": pa.array([datetime.now()], pa.timestamp("ns")) }, columns=[ Column("A", ColumnType.Text()), Column("B", ColumnType.Number("{:,.3f}")), Column("C", ColumnType.Datetime()), ], dir=self.basedir, ) as arrow_table: self._test_render(render, arrow_table=arrow_table)
def test_metadata_comes_from_db_columns(self): columns = [ Column("A", ColumnType.Number(format="{:,.2f}")), Column("B", ColumnType.Datetime()), Column("C", ColumnType.Text()), ] result = RenderResult( arrow_table({ "A": [1], "B": [datetime.datetime.now()], "C": ["x"] }, columns=columns)) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) # Delete from disk entirely, to prove we did not read. minio.remove(BUCKET, crr_parquet_key(self.wf_module.cached_render_result)) # Load _new_ CachedRenderResult -- from DB columns, not memory fresh_wf_module = WfModule.objects.get(id=self.wf_module.id) cached_result = fresh_wf_module.cached_render_result self.assertEqual(cached_result.table_metadata, TableMetadata(1, columns))
def _arrow_column_to_column(name: str, column: pyarrow.ChunkedArray) -> Column: if pyarrow.types.is_floating(column.type) or pyarrow.types.is_integer( column.type): column_type = ColumnType.Number("{:,}") elif pyarrow.types.is_timestamp(column.type): column_type = ColumnType.Datetime() elif pyarrow.types.is_string(column.type) or pyarrow.types.is_dictionary( column.type): column_type = ColumnType.Text() else: raise RuntimeError("Unknown column type %r" % column.type) return Column(name, column_type)
def test_read_cached_render_result_slice_as_text_datetime(self): result = RenderResult( arrow_table( { "A": pa.array([2134213412341232967, None], pa.timestamp("ns")) }, columns=[Column("A", ColumnType.Datetime())], )) cache_render_result(self.workflow, self.wf_module, self.delta.id, result) crr = self.wf_module.cached_render_result self.assertEqual( read_cached_render_result_slice_as_text(crr, "csv", range(2), range(3)), "A\n2037-08-18T13:03:32.341232967Z\n", )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. input_shape = TableMetadata( 3, [ Column("A", ColumnType.Number()), Column("B", ColumnType.Datetime()), Column("C", ColumnType.Text()), ], ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_render_arrow_table_infer_output_column_formats_from_input(self): input_columns = [ Column("A", ColumnType.Number("{:,.3f}")), Column("B", ColumnType.Number("{:,.3f}")), Column("C", ColumnType.Number("{:,.3f}")), Column("D", ColumnType.Datetime()), Column("E", ColumnType.Datetime()), Column("F", ColumnType.Datetime()), Column("G", ColumnType.Text()), Column("H", ColumnType.Text()), Column("I", ColumnType.Text()), ] # The param name "arrow_table" is a special case def render(arrow_table, params, output_path, *, columns, **kwargs): # Test the "columns" kwarg self.assertEqual(columns, input_columns) table = pa.table( { "A": [1], "B": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "C": ["a"], "D": [1], "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "F": ["a"], "G": [1], "H": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "I": ["a"], } ) with pa.ipc.RecordBatchFileWriter(output_path, table.schema) as writer: writer.write_table(table) return [] with arrow_table_context( { "A": [1], "B": [1], "C": [1], "D": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "E": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "F": pa.array([datetime(2020, 3, 8)], pa.timestamp("ns")), "G": ["a"], "H": ["a"], "I": ["a"], }, columns=input_columns, dir=self.basedir, ) as arrow_table: result = self._test_render(render, arrow_table=arrow_table) self.assertEqual( result.table.metadata.columns, [ Column("A", ColumnType.Number("{:,.3f}")), # recalled Column("B", ColumnType.Datetime()), # inferred Column("C", ColumnType.Text()), # inferred Column("D", ColumnType.Number("{:,}")), # inferred Column("E", ColumnType.Datetime()), # recalled Column("F", ColumnType.Text()), # inferred Column("G", ColumnType.Number("{:,}")), # inferred Column("H", ColumnType.Datetime()), # inferred Column("I", ColumnType.Text()), # recalled ], )
def Datetime(name: str) -> Column: return Column(name, ColumnType.Datetime())