def test_metadata_comes_from_db_columns(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) # Load _new_ CachedRenderResult -- from DB columns, not memory fresh_wf_module = WfModule.objects.get(id=self.wf_module.id) cached_result = fresh_wf_module.cached_render_result self.assertFalse(hasattr(cached_result, "_result")) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_dict_prompting_error_concatenate_same_type(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.TEXT()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], "text", frozenset({"number"})) ], )
def test_result_and_metadata_come_from_memory_when_available(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) self.assertFalse(cached_result._result is None) self.assertEqual(cached_result.result, result) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce(table, try_fallback_columns=[ Column('A', ColumnType.TEXT()), Column('B', ColumnType.NUMBER()), ]) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), ])
def test_coerce_infer_columns_try_fallback_columns_ignore_wrong_type(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce( table, try_fallback_columns=[ Column("A", ColumnType.TEXT()), Column("B", ColumnType.NUMBER()), ], ) self.assertEqual( result.columns, [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())], )
def test_list_prompting_error_concatenate_same_type(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({'number'})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A', 'B'], 'text', frozenset({'number'})), ])
def test_columns(self): df = pd.DataFrame({ 'A': [1], # number 'B': ['foo'], # str 'C': dt(2018, 8, 20), # datetime64 }) df['D'] = pd.Series(['cat'], dtype='category') result = ProcessResult(df) self.assertEqual(result.column_names, ['A', 'B', 'C', 'D']) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), Column('D', ColumnType.TEXT()), ])
def test_list_prompting_error_concatenate_different_type(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"number"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_coerce_infer_columns(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce(table) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), ])
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_coerce_infer_columns(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce(table) self.assertEqual( result.columns, [Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT())], )
def test_columns(self): df = pd.DataFrame({ "A": [1], # number "B": ["foo"], # str "C": dt(2018, 8, 20), # datetime64 }) df["D"] = pd.Series(["cat"], dtype="category") result = ProcessResult(df) self.assertEqual(result.column_names, ["A", "B", "C", "D"]) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT()), Column("C", ColumnType.DATETIME()), Column("D", ColumnType.TEXT()), ], )
def test_dict_prompting_error(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.TEXT()), ]), None, None) schema = ParamDType.Dict({ 'col1': ParamDType.Column(column_types=frozenset({'number'})), 'col2': ParamDType.Column(column_types=frozenset({'datetime'})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {'col1': 'A', 'col2': 'B'}, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'})), PromptingError.WrongColumnType(['B'], 'text', frozenset({'datetime'})), ])
def test_coerce_infer_columns_with_format(self): table = pd.DataFrame({'A': [1, 2], 'B': ['x', 'y']}) result = ProcessResult.coerce({ 'dataframe': table, 'column_formats': {'A': '{:,d}'}, }) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER(format='{:,d}')), Column('B', ColumnType.TEXT()), ])
def test_clean_column_prompting_error_convert_to_number(self): input_shape = TableShape(3, [Column('A', ColumnType.TEXT())]) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({'number'})), 'A', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), ])
def test_ctor_infer_columns(self): result = ProcessResult(pd.DataFrame({ 'A': [1, 2], 'B': ['x', 'y'], 'C': [ np.nan, dt(2019, 3, 3, 4, 5, 6, 7) ], })) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), ])
def test_dict_prompting_error(self): input_shape = TableShape( 3, [Column("A", ColumnType.TEXT()), Column("B", ColumnType.TEXT())] ) schema = ParamDType.Dict( { "col1": ParamDType.Column(column_types=frozenset({"number"})), "col2": ParamDType.Column(column_types=frozenset({"datetime"})), } ) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"col1": "A", "col2": "B"}, input_shape) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "text", frozenset({"datetime"})), ], )
def test_clean_column_prompting_error_convert_to_number(self): context = RenderContext( None, None, TableShape(3, [Column("A", ColumnType.TEXT())]), None, None) with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Column(column_types=frozenset({"number"})), "A", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})) ], )
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame({ "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], })) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT()), Column("C", ColumnType.DATETIME()), ], )
def test_coerce_infer_columns_with_format(self): table = pd.DataFrame({"A": [1, 2], "B": ["x", "y"]}) result = ProcessResult.coerce({ "dataframe": table, "column_formats": { "A": "{:,d}" } }) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.TEXT()), ], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. input_shape = TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.DATETIME()), Column('C', ColumnType.TEXT()), ]) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({'text'})) clean_value(schema, 'A,B', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'number', frozenset({'text' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'text'})), ])
def test_text_type(self): series = pd.Series(['x', np.nan, 'z']) column_type = ColumnType.TEXT() result = column_type.format_series(series) assert_series_equal(result, pd.Series(['x', np.nan, 'z']))