def test_to_arrow_two_symmetric(self): data = { "a": [1, 2, 3, 4], "b": ["hello", "world", "hello2", "world2"], "c": [datetime(2019, 7, 11, 12, i) for i in range(0, 40, 10)] } tbl = Table(data) view = tbl.view(group_by=["a"], split_by=["b"]) arrow = view.to_arrow() tbl2 = Table(arrow) assert tbl2.schema() == { 'a (Group by 1)': int, "hello|a": int, "hello|b": int, "hello|c": int, "world|a": int, "world|b": int, "world|c": int, "hello2|a": int, "hello2|b": int, "hello2|c": int, "world2|a": int, "world2|b": int, "world2|c": int, } d = view.to_dict() d['a (Group by 1)'] = [ x[0] if len(x) > 0 else None for x in d.pop("__ROW_PATH__") ] assert tbl2.view().to_dict() == d
def test_manager_set_queue_process_multiple(self, sentinel): # manager2's queue process should not affect manager1, # provided they manage different tables s = sentinel(0) s2 = sentinel(0) manager = PerspectiveManager() manager2 = PerspectiveManager() table = Table({"a": [1, 2, 3]}) table2 = Table({"a": [1, 2, 3]}) manager.host_table("tbl", table) manager2.host_table("tbl2", table2) def fake_queue_process(f, *args, **kwargs): s2.set(s2.get() + 1) f(*args, **kwargs) manager2.set_loop_callback(fake_queue_process) table.update({"a": [4, 5, 6]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6] } table2.update({"a": [7, 8, 9]}) table.update({"a": [7, 8, 9]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6, 7, 8, 9] } assert table2.view().to_dict() == { "a": [1, 2, 3, 7, 8, 9] } assert s.get() == 0 assert s2.get() == 2
def test_to_arrow_nones_symmetric(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow() tbl2 = Table(arr) assert tbl2.view().to_dict() == data
def test_to_arrow_boolean_symmetric(self): data = {"a": [True, False, None, False, True, None]} tbl = Table(data) assert tbl.schema() == {"a": bool} arr = tbl.view().to_arrow() tbl2 = Table(arr) assert tbl2.view().to_dict() == data
def test_view_computed_multiple_views_should_not_conflate(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(computed_columns=[{ "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }]) view2 = table.view(computed_columns=[{ "column": "computed2", "computed_function_name": "-", "inputs": ["a", "b"], }]) assert view.schema() == {"a": int, "b": int, "computed": float} assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed2": [-4, -4, -4, -4], }
def test_to_arrow_one_symmetric(self): data = { "a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"], "c": [ datetime(2019, 7, 11, 12, 0), datetime(2019, 7, 11, 12, 10), datetime(2019, 7, 11, 12, 20), datetime(2019, 7, 11, 12, 30) ] } tbl = Table(data) view = tbl.view(group_by=["a"]) arrow = view.to_arrow() tbl2 = Table(arrow) assert tbl2.schema() == { 'a (Group by 1)': int, "a": int, "b": int, "c": int } d = view.to_dict() d['a (Group by 1)'] = [ x[0] if len(x) > 0 else None for x in d.pop("__ROW_PATH__") ] assert tbl2.view().to_dict() == d
def test_view_expression_multiple_views_should_all_clear(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(expressions=[ '// computed \n "a" + "b"', ]) view2 = table.view(expressions=['// computed2 \n "a" - "b"']) assert view.schema() == {"a": int, "b": int, "computed": float} assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed2": [-4, -4, -4, -4], } table.clear() assert view.schema() == {"a": int, "b": int, "computed": float} assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == {} assert view2.to_columns() == {}
def test_view_expression_delete_and_create(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(expressions=[ '// computed \n "a" + "b"', ]) assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } view.delete() view2 = table.view(expressions=['// computed \n "a" - "b"']) assert view2.schema() == {"a": int, "b": int, "computed": float} assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [-4, -4, -4, -4], }
def test_view_expression_multiple_views_with_the_same_alias_pivoted( self, ): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(row_pivots=["computed"], aggregates={"computed": ["weighted mean", "b"]}, expressions=['// computed \n "a" + "b"']) view2 = table.view( row_pivots=["computed"], aggregates={"computed": "last"}, expressions=['// computed \nconcat(\'abc\', \' \', \'def\')']) assert view.expression_schema() == {"computed": float} assert view2.expression_schema() == { "computed": str, } result = view.to_dict() result2 = view2.to_dict() assert result["__ROW_PATH__"] == [[], [6], [8], [10], [12]] assert result2["__ROW_PATH__"] == [[], ["abc def"]] assert result["computed"] == [9.384615384615385, 6, 8, 10, 12] assert result2["computed"] == ["abc def", "abc def"]
def test_to_arrow_column_only_symmetric(self): data = { "a": [1, 2, 3, 4], "b": ["a", "b", "c", "d"], "c": [datetime(2019, 7, 11, 12, i) for i in range(0, 40, 10)] } tbl = Table(data) view = tbl.view(column_pivots=["a"]) arrow = view.to_arrow() tbl2 = Table(arrow) assert tbl2.schema() == { "1|a": int, "1|b": str, "1|c": datetime, "2|a": int, "2|b": str, "2|c": datetime, "3|a": int, "3|b": str, "3|c": datetime, "4|a": int, "4|b": str, "4|c": datetime, } d = view.to_dict() assert tbl2.view().to_dict() == d
def test_view_delete_with_scope(self): """Tests that `View`'s `__del__` method, when called by the Python reference counter, leaves an empty `Table` in a clean state. """ table = Table( { "id": int, "msg": str, "val": float }, index="id", ) table.view( computed_columns=[{ "column": "inverted", "computed_function_name": "invert", "inputs": ["val"], }], columns=["inverted"], ) table.update([{ "id": 1, "msg": "test", "val": 1.0, }])
def test_view_computed_should_not_overwrite_real_dependencies(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) with raises(PerspectiveCppError) as ex: table.view(computed_columns=[ { "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }, { "column": "computed2", "computed_function_name": "sqrt", "inputs": ["computed"], }, { "column": "a", # invalid "computed_function_name": "+", "inputs": ["computed", "computed2"], }, { "column": "computed3", # will be skipped "computed_function_name": "+", "inputs": ["a", "computed2"], }, { "column": "computed4", # will not be skipped "computed_function_name": "+", "inputs": ["computed", "computed2"], } ]) assert str( ex.value ) == "View creation failed: cannot overwrite Table column 'a' with a computed column.\n"
def test_to_arrow_str_symmetric(self): data = {"a": ["a", "b", "c", "d", "e", None]} tbl = Table(data) assert tbl.schema() == {"a": str} arr = tbl.view().to_arrow() tbl2 = Table(arr) assert tbl2.view().to_dict() == data
def test_view_computed_delete_and_create(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(computed_columns=[{ "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }]) assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } view.delete() view2 = table.view(computed_columns=[{ "column": "computed2", "computed_function_name": "-", "inputs": ["a", "b"], }]) assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed2": [-4, -4, -4, -4], }
def test_to_arrow_end_col_invalid(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow(end_col=6) tbl2 = Table(arr) assert tbl2.view().to_dict() == data
def test_to_arrow_two_symmetric(self): data = { "a": [1, 2, 3, 4], "b": ["hello", "world", "hello2", "world2"], "c": [datetime(2019, 7, 11, 12, i) for i in range(0, 40, 10)] } tbl = Table(data) view = tbl.view(row_pivots=["a"], column_pivots=["b"]) arrow = view.to_arrow() tbl2 = Table(arrow) assert tbl2.schema() == { "hello|a": int, "hello|b": int, "hello|c": int, "world|a": int, "world|b": int, "world|c": int, "hello2|a": int, "hello2|b": int, "hello2|c": int, "world2|a": int, "world2|b": int, "world2|c": int, } d = view.to_dict() d.pop("__ROW_PATH__") assert tbl2.view().to_dict() == d
def test_to_arrow_start_end_col_equiv(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow(start_col=1, end_col=1) tbl2 = Table(arr) assert tbl2.view().to_dict() == {}
def test_exception_from_core_catch_generic(self): tbl = Table({"a": [1, 2, 3]}) # `PerspectiveCppError` should inherit from `Exception` with raises(Exception) as ex: tbl.view(group_by=["b"]) assert (str( ex.value) == "Invalid column 'b' found in View group_by.\n")
def test_view_expression_should_not_overwrite_real(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) with raises(PerspectiveCppError) as ex: table.view(expressions=['// a \n upper("a")']) assert ( str(ex.value) == "View creation failed: cannot create expression column 'a' that overwrites a column that already exists.\n" )
def test_view_expression_multiple_views_with_the_same_alias_all_types( self, ): now = datetime.now() today = date.today() month_bucketed = datetime(today.year, today.month, 1) minute_bucketed = datetime(now.year, now.month, now.day, now.hour, now.minute, 0, 0) table = Table({ "a": [1, 2, 3, 4], "b": [5.5, 6.5, 7.5, 8.5], "c": [datetime.now() for _ in range(4)], "d": [date.today() for _ in range(4)], "e": [True, False, True, False], "f": ["a", "b", "c", "d"] }) view = table.view(expressions=[ '// computed \n "a" + "b"', '// computed2 \n bucket("c", \'M\')', '// computed3 \n concat(\'a\', \'b\', \'c\')', '// computed4 \n \'new string\'', ]) view2 = table.view(expressions=[ '// computed \n upper("f")', '// computed2 \n 20 + ("b" * "a")', '// computed4 \n bucket("c", \'m\')', ]) assert view.expression_schema() == { "computed": float, "computed2": date, "computed3": str, "computed4": str, } assert view2.expression_schema() == { "computed": str, "computed2": float, "computed4": datetime, } result = view.to_dict() result2 = view2.to_dict() assert result["computed"] == [6.5, 8.5, 10.5, 12.5] assert result2["computed"] == ["A", "B", "C", "D"] assert result["computed2"] == [month_bucketed for _ in range(4)] assert result2["computed2"] == [25.5, 33, 42.5, 54] assert result["computed3"] == ["abc", "abc", "abc", "abc"] assert "computed3" not in result2 assert result["computed4"] == ["new string" for _ in range(4)] assert result2["computed4"] == [minute_bucketed for _ in range(4)]
def test_exception_from_core(self): tbl = Table({"a": [1, 2, 3]}) with raises(PerspectiveCppError) as ex: # creating view with unknown column should throw tbl.view(group_by=["b"]) assert (str( ex.value) == "Invalid column 'b' found in View group_by.\n")
def test_to_arrow_start_end_row(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow(start_row=2, end_row=3) tbl2 = Table(arr) assert tbl2.view().to_dict() == { "a": data["a"][2:3], "b": data["b"][2:3] }
def test_to_arrow_start_end_col_equiv_row(self): data = {"a": [None, 1, None, 2, 3], "b": [1.5, 2.5, None, 3.5, None]} tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow(start_col=1, end_col=1, start_row=2, end_row=3) tbl2 = Table(arr) # start/end col is a range - thus start=end provides no columns assert tbl2.view().to_dict() == {}
def test_view_computed_invalid_type_should_throw(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) with raises(PerspectiveCppError) as ex: table.view(computed_columns=[{ "column": "computed", "computed_function_name": "uppercase", "inputs": ["a"], }]) assert str( ex.value ) == "View creation failed: could not build computed column 'computed' as the input column types are invalid.\n"
def test_view_computed_should_not_overwrite_real(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) with raises(PerspectiveCppError) as ex: table.view(computed_columns=[{ "column": "a", # invalid - col already exists "computed_function_name": "+", "inputs": ["a", "b"], }]) assert str( ex.value ) == "View creation failed: cannot overwrite Table column 'a' with a computed column.\n"
def test_exception_from_core_correct_types(self): tbl = Table({"a": [1, 2, 3]}) # `PerspectiveError` should be raised from the Python layer with raises(PerspectiveError) as ex: tbl.view() tbl.delete() assert str(ex.value) == "Cannot delete a Table with active views still linked to it - call delete() on each view, and try again." with raises(PerspectiveCppError) as ex: tbl.view(row_pivots=["b"]) assert str(ex.value) == "Column b does not exist in schema."
def test_view_computed_dependencies_do_not_cross_over_different_views( self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(computed_columns=[ { "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }, { "column": "computed2", "computed_function_name": "sqrt", "inputs": ["computed"], }, { "column": "computed3", "computed_function_name": "exp", "inputs": ["computed"], }, { "column": "computed4", "computed_function_name": "pow2", "inputs": ["computed3"], }, ]) assert view.schema() == { "a": int, "b": int, "computed": float, "computed2": float, "computed3": float, "computed4": float, } assert view.computed_schema() == { "computed": float, "computed2": float, "computed3": float, "computed4": float, } with raises(PerspectiveCppError) as ex: table.view(computed_columns=[ { "column": "computed5", "computed_function_name": "pow2", "inputs": ["computed3"], }, ]) assert str( ex.value ) == "Could not get dtype for column `computed3` as it does not exist in the schema.\n"
def test_to_arrow_date_symmetric_january(self): data = {"a": [date(2019, 1, 1), date(2016, 1, 1), date(2019, 1, 1)]} tbl = Table(data) assert tbl.schema() == {"a": date} arr = tbl.view().to_arrow() tbl2 = Table(arr) assert tbl2.schema() == tbl.schema() assert tbl2.view().to_dict() == { "a": [datetime(2019, 1, 1), datetime(2016, 1, 1), datetime(2019, 1, 1)] }
def test_to_arrow_big_numbers_symmetric(self): data = { "a": [1, 2, 3, 4], "b": [ 1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308 ] } tbl = Table(data) assert tbl.schema() == {"a": int, "b": float} arr = tbl.view().to_arrow() tbl2 = Table(arr) assert tbl2.view().to_dict() == data
def test_manager_table_computed_schema(self): post_callback = partial(self.validate_post, expected={ "id": 1, "data": { "abc": "float" } }) message = { "id": 1, "name": "table1", "cmd": "table_method", "method": "computed_schema", "args": [ [ { "column": "abc", "computed_function_name": "+", "inputs": ["a", "a"] } ] ] } manager = PerspectiveManager() table = Table(data) view = table.view() manager.host_table("table1", table) manager._process(message, post_callback)