def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) value = [ { 'column': 'A', 'color': '#aaaaaa' }, { 'column': 'B', 'color': '#cccccc' }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_clean_multichartseries_non_number_is_prompting_error(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) value = [ { "column": "A", "color": "#aaaaaa" }, { "column": "B", "color": "#cccccc" }, ] with self.assertRaises(PromptingError) as cm: clean_value(ParamDType.Multichartseries(), value, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_dict_prompting_error_concatenate_different_types(self): context = RenderContext( None, None, TableShape(3, [ Column("A", ColumnType.TEXT()), Column("B", ColumnType.DATETIME()) ]), None, None, ) schema = ParamDType.Dict({ "x": ParamDType.Column(column_types=frozenset({"number"})), "y": ParamDType.Column(column_types=frozenset({"number"})), }) with self.assertRaises(PromptingError) as cm: clean_value(schema, {"x": "A", "y": "B"}, context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A"], "text", frozenset({"number"})), PromptingError.WrongColumnType(["B"], "datetime", frozenset({"number"})), ], )
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), ], ), None, None, ) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({"text"})) clean_value(schema, "A,B", context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_list_prompting_error_concatenate_different_type_to_text(self): context = RenderContext( None, None, TableShape( 3, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.DATETIME()) ], ), None, None, ) schema = ParamDType.List(inner_dtype=ParamDType.Column( column_types=frozenset({"text"}))) with self.assertRaises(PromptingError) as cm: clean_value(schema, ["A", "B"], context) self.assertEqual( cm.exception.errors, [ PromptingError.WrongColumnType(["A", "B"], None, frozenset({"text"})) ], )
def test_metadata_comes_from_db_columns(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) # Load _new_ CachedRenderResult -- from DB columns, not memory fresh_wf_module = WfModule.objects.get(id=self.wf_module.id) cached_result = fresh_wf_module.cached_render_result self.assertFalse(hasattr(cached_result, "_result")) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_result_and_metadata_come_from_memory_when_available(self): columns = [ Column("A", ColumnType.NUMBER(format="{:,d}")), Column("B", ColumnType.DATETIME()), Column("C", ColumnType.TEXT()), Column("D", ColumnType.TEXT()), ] result = ProcessResult( dataframe=pandas.DataFrame( { "A": [1], # int64 "B": [datetime.datetime(2018, 8, 20)], # datetime64[ns] "C": ["foo"], # str "D": pandas.Series(["cat"], dtype="category"), } ), columns=columns, ) cached_result = self.wf_module.cache_render_result(self.delta.id, result) # cache_render_result() keeps its `result` parameter in memory, so we # can avoid disk entirely. Prove it by deleting from disk. minio.remove(minio.CachedRenderResultsBucket, cached_result.parquet_key) self.assertFalse(cached_result._result is None) self.assertEqual(cached_result.result, result) self.assertEqual(cached_result.nrows, 1) self.assertEqual(cached_result.columns, columns)
def test_format(self): series = pd.Series([dt(1999, 2, 3, 4, 5, 6, 7), np.nan, dt(2000, 3, 4, 5, 6, 7, 8)]) column_type = ColumnType.DATETIME() result = column_type.format_series(series) assert_series_equal( result, pd.Series(['1999-02-03T04:05:06.000007Z', np.nan, '2000-03-04T05:06:07.000008Z']) )
def test_ctor_infer_columns(self): result = ProcessResult(pd.DataFrame({ 'A': [1, 2], 'B': ['x', 'y'], 'C': [ np.nan, dt(2019, 3, 3, 4, 5, 6, 7) ], })) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), ])
def test_columns(self): df = pd.DataFrame({ 'A': [1], # number 'B': ['foo'], # str 'C': dt(2018, 8, 20), # datetime64 }) df['D'] = pd.Series(['cat'], dtype='category') result = ProcessResult(df) self.assertEqual(result.column_names, ['A', 'B', 'C', 'D']) self.assertEqual(result.columns, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.TEXT()), Column('C', ColumnType.DATETIME()), Column('D', ColumnType.TEXT()), ])
def test_ctor_infer_columns(self): result = ProcessResult( pd.DataFrame({ "A": [1, 2], "B": ["x", "y"], "C": [np.nan, dt(2019, 3, 3, 4, 5, 6, 7)], })) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT()), Column("C", ColumnType.DATETIME()), ], )
def test_list_prompting_error_concatenate_different_type(self): context = RenderContext(None, None, TableShape(3, [ Column('A', ColumnType.TEXT()), Column('B', ColumnType.DATETIME()), ]), None, None) schema = ParamDType.List( inner_dtype=ParamDType.Column(column_types=frozenset({'number'})) ) with self.assertRaises(PromptingError) as cm: clean_value(schema, ['A', 'B'], context) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'text', frozenset({'number'})), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'number'})), ])
def test_clean_multicolumn_prompting_error_convert_to_text(self): # TODO make this _automatic_ instead of quick-fix? # ... but for now: prompt for a Quick Fix. input_shape = TableShape(3, [ Column('A', ColumnType.NUMBER()), Column('B', ColumnType.DATETIME()), Column('C', ColumnType.TEXT()), ]) with self.assertRaises(PromptingError) as cm: schema = ParamDType.Multicolumn(column_types=frozenset({'text'})) clean_value(schema, 'A,B', input_shape) self.assertEqual(cm.exception.errors, [ PromptingError.WrongColumnType(['A'], 'number', frozenset({'text' })), PromptingError.WrongColumnType(['B'], 'datetime', frozenset({'text'})), ])
def test_columns(self): df = pd.DataFrame({ "A": [1], # number "B": ["foo"], # str "C": dt(2018, 8, 20), # datetime64 }) df["D"] = pd.Series(["cat"], dtype="category") result = ProcessResult(df) self.assertEqual(result.column_names, ["A", "B", "C", "D"]) self.assertEqual( result.columns, [ Column("A", ColumnType.NUMBER()), Column("B", ColumnType.TEXT()), Column("C", ColumnType.DATETIME()), Column("D", ColumnType.TEXT()), ], )