def test_widget_eventual_table_indexed(self): table = Table({"a": np.arange(0, 50)}, index="a") widget = PerspectiveWidget(None, plugin="X Bar") assert widget.plugin == "X Bar" widget.load(table) load_msg = widget._make_load_message() assert load_msg.to_dict() == { "id": -2, "type": "table", "data": { "table_name": widget.table_name, "options": { "index": "a" } } }
def benchmark_view_zero_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 10 views.""" table = Table(self._schema) views = [table.view() for i in range(25)] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "zero")) setattr(self, "update_zero", func)
def test_manager_clear_view_nonseq(self): messages = [ {"id": 1, "table_name": "table1", "view_name": "view1", "cmd": "view"}, {"id": 2, "table_name": "table1", "view_name": "view2", "cmd": "view"}, {"id": 3, "table_name": "table1", "view_name": "view3", "cmd": "view"} ] manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) for i, message in enumerate(messages, 1): manager._process(message, self.post, client_id=i) manager.clear_views(1) manager.clear_views(3) assert "view1" not in manager._views assert "view3" not in manager._views assert "view2" in manager._views
def __init__(self): """Create a benchmark suite for `perspective-python`.""" tbl = Table(SUPERSTORE) self._schema = tbl.schema() self._df_schema = tbl.schema() # mutate schema to have some integer columns, so as to force numpy # float-to-int demotion self._df_schema["Sales"] = int self._df_schema["Profit"] = int self._df_schema["Quantity"] = int self._view = tbl.view() self.dict = self._view.to_dict() self.records = self._view.to_records() self.df = SUPERSTORE self.csv = self._view.to_csv() self.arrow = self._view.to_arrow() self._table = tbl
def test_manager_set_queue_process(self, sentinel): s = sentinel(0) manager = PerspectiveManager() table = Table({"a": [1, 2, 3]}) manager.host_table("tbl", table) table.update({"a": [4, 5, 6]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6] } def fake_queue_process(table_id, state_manager): s.set(s.get() + 1) state_manager.call_process(table_id) manager._set_queue_process(fake_queue_process) table.update({"a": [7, 8, 9]}) assert s.get() == 1
def test_async_queue_process_csv(self): """Make sure GIL release during CSV loading works""" tbl = Table("x,y,z\n1,a,true\n2,b,false\n3,c,true\n4,d,false") manager = PerspectiveManager() manager.set_loop_callback(TestAsync.loop.add_callback) manager.host(tbl) @syncify def _task(): assert tbl.size() == 4 for i in range(5): tbl.update("x,y,z\n1,a,true\n2,b,false\n3,c,true\n4,d,false") return tbl.size() assert _task() == 24 tbl.delete()
def test_widget_eventual_data_with_server(self): # should fail widget = PerspectiveWidget(None, server=True) with raises(PerspectiveError): widget._make_load_message() # then succeed widget.load(Table({"a": np.arange(0, 50)})) load_msg = widget._make_load_message() assert load_msg.to_dict() == { "id": -2, "type": "table", "data": { "table_name": widget.table_name } }
def test_manager_create_indexed_table_and_remove(self): message = {"id": 1, "name": "table1", "cmd": "table", "args": [data], "options": {"index": "a"}} manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, self.post) assert manager._tables["table1"].schema() == { "a": int, "b": str } assert manager._tables["table1"]._index == "a" remove_message = {"id": 2, "name": "table1", "cmd": "table_method", "method": "remove", "args": [[1, 2]]} manager._process(remove_message, self.post) assert manager._tables["table1"].view().to_dict() == { "a": [3], "b": ["c"] }
def test_widget_load_view(self): table = Table({"a": np.arange(0, 50)}) view = table.view() widget = PerspectiveWidget(view, plugin="x_bar") assert widget.plugin == "x_bar" load_msg = widget._make_load_message() assert load_msg.to_dict() == { "id": -2, "type": "table", "data": { "table_name": widget.table_name, "view_name": widget._perspective_view_name, "options": { "index": "" } } }
def test_view_month_bucket_datetime_with_null(self): table = Table({ "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)], }) view = table.view(computed_columns=[{ "column": "bucket", "computed_function_name": "month_bucket", "inputs": ["a"], }]) assert view.schema() == {"a": datetime, "bucket": date} assert view.to_columns() == { "a": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 15)], "bucket": [datetime(2020, 1, 1), None, None, datetime(2020, 3, 1)], }
def test_manager_table_get_computation_input_types(self): post_callback = partial(self.validate_post, expected={ "id": 1, "data": ["string"] }) message = { "id": 1, "name": "table1", "cmd": "table_method", "method": "get_computation_input_types", "args": ["concat_comma"] } manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, post_callback)
def test_manager_create_indexed_table_and_update(self): message = {"id": 1, "name": "table1", "cmd": "table", "args": [data], "options": {"index": "a"}} manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, self.post) assert manager._tables["table1"].schema() == { "a": int, "b": str } assert manager._tables["table1"].get_index() == "a" update_message = {"id": 2, "name": "table1", "cmd": "table_method", "method": "update", "args": [{"a": [1, 2, 3], "b": ["str1", "str2", "str3"]}]} manager._process(update_message, self.post) assert manager._tables["table1"].view().to_dict() == { "a": [1, 2, 3], "b": ["str1", "str2", "str3"] }
def test_manager_set_queue_process(self, sentinel): s = sentinel(0) manager = PerspectiveManager() table = Table({"a": [1, 2, 3]}) manager.host_table("tbl", table) table.update({"a": [4, 5, 6]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6] } def fake_queue_process(f, *args, **kwargs): s.set(s.get() + 1) f(*args, **kwargs) manager.set_loop_callback(fake_queue_process) table.update({"a": [7, 8, 9]}) assert s.get() == 2
def test_manager_create_indexed_table(self): message = { "id": 1, "name": "table1", "cmd": "table", "args": [data], "options": { "index": "a" } } manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, self.post) assert manager._tables["table1"].schema() == {"a": int, "b": str} assert manager._tables["table1"]._index == "a"
def test_view_date_expression(self): table = Table({"x": [1]}) view = table.view(expressions=[ '// computed\n date(2020, 5, 30)', '// computed2\n date(1997, 8, 31)' ]) assert view.expression_schema() == { "computed": date, "computed2": date } result = view.to_dict() assert result["computed"] == [datetime(2020, 5, 30)] assert result["computed2"] == [datetime(1997, 8, 31)]
def benchmark_view_two_column_only_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 25 views.""" table = Table(self._schema) views = [ table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = self._get_update_data(1000) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only")) setattr(self, "update_two_column_only", func)
def test_view_day_of_week_date(self): table = Table({"a": [date(2020, 3, i) for i in range(9, 14)]}) view = table.view(computed_columns=[{ "column": "bucket", "computed_function_name": "day_of_week", "inputs": ["a"], }]) assert view.schema() == {"a": date, "bucket": str} assert view.to_columns() == { "a": [datetime(2020, 3, i) for i in range(9, 14)], "bucket": [ "2 Monday", "3 Tuesday", "4 Wednesday", "5 Thursday", "6 Friday", ], }
def test_exception_from_core_correct_types(self): tbl = Table({"a": [1, 2, 3]}) # `PerspectiveError` should be raised from the Python layer with raises(PerspectiveError) as ex: tbl.view() tbl.delete() assert ( str(ex.value) == "Cannot delete a Table with active views still linked to it - call delete() on each view, and try again." ) with raises(PerspectiveCppError) as ex: tbl.view(group_by=["b"]) assert (str( ex.value) == "Invalid column 'b' found in View group_by.\n")
def benchmark_view_two_column_only_df_updates(self): """Benchmark dataframe updates for two-sided column only views.""" table = Table(self._df_schema) views = [ table.view(column_pivots=["Category", "Sub-Category"]) for i in range(25) ] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "two_column_only_df")) setattr(self, "update_two_column_only_df", func)
def benchmark_view_zero_df_updates(self): """Benchmark how long it takes for each update to resolve fully, using the on update callback that forces resolution of updates across 10 views. This version updates using dataframes, and is designed to compare the overhead of dataframe loading vs. regular data structure loading.""" table = Table(self._df_schema) views = [table.view() for i in range(25)] for v in views: v.on_update(empty_callback) update_data = pd.DataFrame(self._get_update_data(1000)) def resolve_update(): table.update(update_data) table.size() func = Benchmark(resolve_update, meta=make_meta("update", "zero_df")) setattr(self, "update_zero_df", func)
def test_view_computed_multiple_views_should_all_replace(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(computed_columns=[{ "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }]) view2 = table.view(computed_columns=[{ "column": "computed2", "computed_function_name": "-", "inputs": ["a", "b"], }]) assert view.schema() == {"a": int, "b": int, "computed": float} assert view2.schema() == {"a": int, "b": int, "computed2": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } assert view2.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed2": [-4, -4, -4, -4], } table.replace({"a": [10, 20, 30, 40], "b": [50, 60, 70, 80]}) assert view.to_columns() == { "a": [10, 20, 30, 40], "b": [50, 60, 70, 80], "computed": [60, 80, 100, 120], } assert view2.to_columns() == { "a": [10, 20, 30, 40], "b": [50, 60, 70, 80], "computed2": [-40, -40, -40, -40], }
def test_async_queue_process(self): tbl = Table({ "a": int, "b": float, "c": str }) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 for i in range(5): tbl.update([data[i]]) # process should have been called at least once assert SENTINEL.get() > 0 tbl.delete()
def test_widget_eventual_data(self): table = Table({"a": np.arange(0, 50)}) widget = PerspectiveWidget(None, plugin="x_bar") assert widget.plugin == "x_bar" with raises(PerspectiveError): widget._make_load_message() widget.load(table) load_msg = widget._make_load_message() assert load_msg.to_dict() == { "id": -2, "type": "table", "data": { "table_name": widget.table_name, "options": {} } }
def make_app(): '''Create and return a Tornado app. For `PerspectiveTornadoHandler` to work, it must be passed an instance of `PerspectiveManager`. The data is loaded into a new `Table`, which is passed to the manager through `host_table`. The front-end is able to look up the table using the name provided to `host_table`. ''' MANAGER = PerspectiveManager() TABLE = Table(pd.read_csv("superstore.csv")) MANAGER.host_table("data_source_one", TABLE) return tornado.web.Application([ (r"/", MainHandler), # create a websocket endpoint that the client Javascript can access (r"/websocket", PerspectiveTornadoHandler, { "manager": MANAGER, "check_origin": True }) ])
def test_async_queue_process(self): tbl = Table({"a": int, "b": float, "c": str}) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 for i in range(5): tbl.update([data[i]]) table_id = tbl._table.get_id() pool = tbl._table.get_pool() assert _PerspectiveStateManager.TO_PROCESS == {table_id: pool} assert tbl.view().to_records() == data[:5] # should have flushed the process queue assert _PerspectiveStateManager.TO_PROCESS == {}
def test_manager_create_view_one(self): message = { "id": 1, "table_name": "table1", "view_name": "view1", "cmd": "view", "config": { "row_pivots": ["a"] } } manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, self.post) assert manager._views["view1"].to_dict() == { "__ROW_PATH__": [[], ["1"], ["2"], ["3"]], "a": [6, 1, 2, 3], "b": [3, 1, 1, 1] }
def make_app(): # Create an instance of `PerspectiveManager` and a table. MANAGER = PerspectiveManager() TABLE = Table( { "name": str, "client": str, "open": float, "high": float, "low": float, "close": float, "lastUpdate": datetime, "date": date }, limit=2500) # Track the table with the name "data_source_one", which will be used in # the front-end to access the Table. MANAGER.host_table("data_source_one", TABLE) # update with new data every 50ms def updater(): TABLE.update(data_source()) callback = tornado.ioloop.PeriodicCallback(callback=updater, callback_time=50) callback.start() return tornado.web.Application([ # create a websocket endpoint that the client Javascript can access (r"/websocket", PerspectiveTornadoHandler, { "manager": MANAGER, "check_origin": True }), (r"/node_modules/(.*)", tornado.web.StaticFileHandler, { "path": "../../node_modules/@finos/" }), (r"/(.*)", tornado.web.StaticFileHandler, { "path": "./", "default_filename": "index.html" }) ])
def test_view_float_expression(self): table = Table({"w": datetime, "x": int, "y": date, "z": float}) view = table.view(expressions=[ '// computed\n float(2147483648)', '// computed2\n float(-2147483649)', '// computed3 \n float(123.456789123)', '// computed4 \n float("x")', '// computed5 \n float("y")', '// computed6 \n float("z")', '// computed7 \n float("w")' ]) dt = datetime(2018, 8, 12, 15, 32, 55) table.update({ "w": [dt], "x": [12136582], "y": [date(2020, 6, 30)], "z": [1.23456] }) assert view.expression_schema() == { "computed": float, "computed2": float, "computed3": float, "computed4": float, "computed5": float, "computed6": float, "computed7": float, } result = view.to_dict() seconds_timestamp = mktime(dt.timetuple()) + dt.microsecond / 1000000.0 ms_timestamp = int(seconds_timestamp * 1000) assert result["computed"] == [2147483648] assert result["computed2"] == [-2147483649] assert result["computed3"] == [123.456789123] assert result["computed4"] == [12136582] assert result["computed5"] == [132384030] assert result["computed6"] == [1.23456] assert result["computed7"] == [ms_timestamp]
def __init__(self, tb_url: str, stream_key: str, symbol: str, record_type: str, time_widget: widgets.Text, booksize=20): self.tb_url = tb_url self.stream_key = stream_key self.symbol = symbol self.record_type = record_type self.time_widget = time_widget self.booksize = booksize self.schema = { 'key': str, 'symbol': str, 'side': str, 'size': float, 'price': float, 'numberOfOrders': int } self.table = Table(self.schema, limit=booksize * 3, index='key') self.book = Book(symbol) self.last_updated = 0 self.stop_reading = False self.init_book()
def test_manager_to_dict_with_nan(self, util, sentinel): data = util.make_arrow(["a"], [[1.5, np.nan, 2.5, np.nan]], types=[pa.float64()]) s = sentinel(False) def handle_to_dict(msg): s.set(True) message = json.loads(msg) assert message == { "id": 2, "error": "JSON serialization error: Cannot serialize `NaN`, `Infinity` or `-Infinity` to JSON." } message = {"id": 1, "table_name": "table1", "view_name": "view1", "cmd": "view"} manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) manager._process(message, self.post) to_dict_message = {"id": 2, "name": "view1", "cmd": "view_method", "method": "to_dict"} manager._process(to_dict_message, handle_to_dict) assert s.get() is True