def test_manager_to_dict_unix_timestamps_from_datetime(self, sentinel): """The conversion from `datetime` to a Unix timestamp should not alter the timestamp in any way if both are in local time.""" s = sentinel(False) timestamp_data = {"a": [1580515140000]} schema = {"a": datetime} def handle_to_dict(msg): s.set(True) message = json.loads(msg) assert message["data"] == timestamp_data # convert back ts = datetime.fromtimestamp(message["data"]["a"][0] / 1000) assert ts == datetime(2020, 1, 31, 23, 59) message = { "id": 1, "table_name": "table1", "view_name": "view1", "cmd": "view" } manager = PerspectiveManager() table = Table(schema) table.update(timestamp_data) manager.host_table("table1", table) manager._process(message, self.post) to_dict_message = { "id": 2, "name": "view1", "cmd": "view_method", "method": "to_dict" } manager._process(to_dict_message, handle_to_dict) assert s.get() is True
def test_async_queue_process_multiple_ports(self): tbl = Table({ "a": int, "b": float, "c": str }) port_ids = [0] port_data = [{ "a": 0, "b": 0, "c": "0" }] for i in range(10): port_id = tbl.make_port() port_ids.append(port_id) port_data.append({ "a": port_id, "b": port_id * 1.5, "c": str(port_id) }) assert port_ids == list(range(0, 11)) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 random.shuffle(port_ids) for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl.update([port_data[idx]], port_id=port_id) # assert that process is being called asynchronously assert SENTINEL.get() > 0 tbl.delete()
def test_view_expression_append(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(expressions=[ '// computed \n "a" + "b"', ]) assert view.schema() == {"a": int, "b": int, "computed": float} assert view.to_columns() == { "a": [1, 2, 3, 4], "b": [5, 6, 7, 8], "computed": [6, 8, 10, 12], } table.update({"a": [5, 6], "b": [9, 10]}) assert view.to_columns() == { "a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10], "computed": [6, 8, 10, 12, 14, 16], }
def test_manager_set_queue_process_multiple(self, sentinel): # manager2's queue process should not affect manager1, # provided they manage different tables s = sentinel(0) s2 = sentinel(0) manager = PerspectiveManager() manager2 = PerspectiveManager() table = Table({"a": [1, 2, 3]}) table2 = Table({"a": [1, 2, 3]}) manager.host_table("tbl", table) manager2.host_table("tbl2", table2) def fake_queue_process(f, *args, **kwargs): s2.set(s2.get() + 1) f(*args, **kwargs) manager2.set_loop_callback(fake_queue_process) table.update({"a": [4, 5, 6]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6] } table2.update({"a": [7, 8, 9]}) table.update({"a": [7, 8, 9]}) assert table.view().to_dict() == { "a": [1, 2, 3, 4, 5, 6, 7, 8, 9] } assert table2.view().to_dict() == { "a": [1, 2, 3, 7, 8, 9] } assert s.get() == 0 assert s2.get() == 2
def test_async_multiple_managers_queue_process(self): tbl = Table({ "a": int, "b": float, "c": str }) tbl2 = Table({ "a": int, "b": float, "c": str }) manager = PerspectiveManager() manager2 = PerspectiveManager() manager.host_table("tbl", tbl) manager2.host_table("tbl2", tbl2) manager._set_queue_process(TestAsync.wrapped_queue_process) manager2._set_queue_process(TestAsync.wrapped_queue_process) for i in range(5): tbl.update([data[i]]) tbl2.update([data[i]]) assert SENTINEL.get() != 0 # flush `TO_PROCESS` assert tbl.view().to_records() == data[:5] for i in range(5): tbl2.update([data[i]])
def test_view_delete_with_scope(self): """Tests that `View`'s `__del__` method, when called by the Python reference counter, leaves an empty `Table` in a clean state. """ table = Table( { "id": int, "msg": str, "val": float }, index="id", ) table.view( expressions=[ '// inverted \n 1 / "val"', ], columns=["inverted"], ) table.update([{ "id": 1, "msg": "test", "val": 1.0, }])
class TableHandler(EventHandler): onData = None # type: ignore onHalt = None # type: ignore onContinue = None # type: ignore onError = None # type: ignore onStart = None # type: ignore onExit = None # type: ignore def __init__(self) -> None: self._trades = Table(Trade.schema(), index="timestamp") self._orders = Table(Order.schema(), index="id") def installTables(self, manager: Any) -> None: manager.host_table("trades", self._trades) manager.host_table("orders", self._orders) def tables(self) -> Tuple[Table, Table]: return self._trades, self._orders async def onTrade(self, event: Event) -> None: """onTrade""" trade: Trade = event.target # type: ignore self._trades.update([trade.json()]) async def onOpen(self, event: Event) -> None: """onOpen""" order: Order = event.target # type: ignore self._orders.update([order.json()]) async def onCancel(self, event: Event) -> None: """onCancel""" order: Order = event.target # type: ignore self._orders.remove([order.id]) async def onChange(self, event: Event) -> None: """onChange""" order: Order = event.target # type: ignore self._orders.update([order.json()]) async def onFill(self, event: Event) -> None: """onFill""" order: Order = event.target # type: ignore self._orders.remove([order.id])
def test_view_expression_delete_and_create_with_updates(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(expressions=[ '// computed \n "a" + "b"', "upper(concat('abc', 'def'))" ]) assert view.schema() == { "a": int, "b": int, "computed": float, "upper(concat('abc', 'def'))": str } table.update({"a": [5, 6], "b": [9, 10]}) assert view.to_columns() == { "a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10], "computed": [6, 8, 10, 12, 14, 16], "upper(concat('abc', 'def'))": ["ABCDEF" for _ in range(6)] } view.delete() view2 = table.view(expressions=[ '// computed2 \n "a" - "b"', ]) assert view2.schema() == {"a": int, "b": int, "computed2": float} table.update({"a": [5, 6], "b": [9, 10]}) table.update({"a": [5, 6], "b": [9, 10]}) assert view2.to_columns() == { "a": [1, 2, 3, 4, 5, 6, 5, 6, 5, 6], "b": [5, 6, 7, 8, 9, 10, 9, 10, 9, 10], "computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4], }
class TableHandler(EventHandler): onData = None # type: ignore onHalt = None # type: ignore onContinue = None # type: ignore onError = None # type: ignore onStart = None # type: ignore onExit = None # type: ignore def __init__(self): self._trades = Table(Trade.perspectiveSchema(), index="timestamp") self._orders = Table(Order.perspectiveSchema(), index="id") def installTables(self, manager) -> None: manager.host_table("trades", self._trades) manager.host_table("orders", self._orders) def tables(self) -> Tuple[Table, Table]: return self._trades, self._orders def onTrade(self, event: Event): '''onTrade''' self._trades.update([event.target.to_json()]) def onOpen(self, event: Event): '''onOpen''' self._orders.update([event.target.to_json()]) def onCancel(self, event: Event): '''onCancel''' self._orders.remove([event.target.id]) def onChange(self, event: Event): '''onChange''' self._orders.update([event.target.to_json()]) def onFill(self, event: Event): '''onFill''' self._orders.remove([event.target.id])
def test_view_computed_delete_and_create_with_updates(self): table = Table({"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}) view = table.view(computed_columns=[{ "column": "computed", "computed_function_name": "+", "inputs": ["a", "b"], }]) assert view.schema() == {"a": int, "b": int, "computed": float} table.update({"a": [5, 6], "b": [9, 10]}) assert view.to_columns() == { "a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10], "computed": [6, 8, 10, 12, 14, 16], } view.delete() view2 = table.view(computed_columns=[{ "column": "computed2", "computed_function_name": "-", "inputs": ["a", "b"], }]) assert view2.schema() == {"a": int, "b": int, "computed2": float} table.update({"a": [5, 6], "b": [9, 10]}) table.update({"a": [5, 6], "b": [9, 10]}) assert view2.to_columns() == { "a": [1, 2, 3, 4, 5, 6, 5, 6, 5, 6], "b": [5, 6, 7, 8, 9, 10, 9, 10, 9, 10], "computed2": [-4, -4, -4, -4, -4, -4, -4, -4, -4, -4], }
class Demo: def __init__(self, tb_url: str, stream_key: str, symbol: str, record_type: str, time_widget: widgets.Text, booksize=20): self.tb_url = tb_url self.stream_key = stream_key self.symbol = symbol self.record_type = record_type self.time_widget = time_widget self.booksize = booksize self.schema = { 'key': str, 'symbol': str, 'side': str, 'size': float, 'price': float, 'numberOfOrders': int } self.table = Table(self.schema, limit=booksize * 3, index='key') self.book = Book(symbol) self.last_updated = 0 self.stop_reading = False self.init_book() def process_entry_update(self, entry: InstrumentMessage) -> None: if entry.action == 'DELETE': self.book.remove(entry.side, entry.price) t = time.time() if t - self.last_updated >= 0.5: self.last_updated = t self.table.update(self.book.get_bids(size=self.booksize)) self.table.update(self.book.get_asks(size=self.booksize)) elif entry.action == 'UPDATE': e = to_dict(self.symbol, entry) self.book.update(e) t = time.time() if t - self.last_updated >= 0.5: self.last_updated = t self.table.update(self.book.get_bids(size=self.booksize)) self.table.update(self.book.get_asks(size=self.booksize)) else: raise Exception(f'Unknown action type: {entry.action}') def process_entry_new(self, entry: InstrumentMessage) -> None: e = to_dict(self.symbol, entry) self.book.update(e) t = time.time() if t - self.last_updated >= 0.5: self.last_updated = t self.table.update(self.book.get_bids(size=self.booksize)) self.table.update(self.book.get_asks(size=self.booksize)) def process_snapshot(self, entries) -> None: self.book.clear() self.book.update(*map(lambda e: to_dict(self.symbol, e), entries)) t = time.time() if t - self.last_updated >= 0.5: self.last_updated = t self.table.update(self.book.get_bids(size=self.booksize)) self.table.update(self.book.get_asks(size=self.booksize)) def init_book(self): db = tbapi.TickDb_createFromUrl(self.tb_url) try: db.open(True) stream = db.getStream(self.stream_key) options = tbapi.SelectionOptions() try: cursor = db.select(current_milli_time() - 10000, [stream], options, [self.record_type], [self.symbol]) while cursor.next(): msg = cursor.getMessage() if msg.packageType == 'PERIODICAL_SNAPSHOT': self.process_snapshot(msg.entries) break finally: cursor.close() finally: db.close() async def read_cursor(self): db = tbapi.TickDb_createFromUrl(self.tb_url) try: db.open(True) stream = db.getStream(self.stream_key) options = tbapi.SelectionOptions() options.live = True try: cursor = db.select(current_milli_time(), [stream], options, [self.record_type], [self.symbol]) initialized = False while cursor.next() and not self.stop_reading and not initialized: msg = cursor.getMessage() if msg.packageType == 'PERIODICAL_SNAPSHOT' or msg.packageType == 'VENDOR_SNAPSHOT': logging.info('received snapshot') self.process_snapshot(msg.entries) initialized = True self.time_widget.value = str(datetime.fromtimestamp(msg.timestamp / 10 ** 9)) while cursor.next() and not self.stop_reading: msg = cursor.getMessage() if msg.packageType == 'INCREMENTAL_UPDATE': for entry in msg.entries: if entry.typeName.endswith('L2EntryUpdate'): self.process_entry_update(entry) elif entry.typeName.endswith('L2EntryNew'): self.process_entry_new(entry) elif msg.packageType == 'PERIODICAL_SNAPSHOT' or msg.packageType == 'VENDOR_SNAPSHOT': self.process_snapshot(msg.entries) self.time_widget.value = str(datetime.fromtimestamp(msg.timestamp / 10 ** 9)) finally: cursor.close() finally: db.close() def update_table(self): logging.info('Started streaming!') loop = asyncio.new_event_loop() task = loop.create_task(self.read_cursor()) loop.call_later(60, task.cancel) try: loop.run_until_complete(task) except asyncio.CancelledError: logging.info("Stopped streaming!") pass def start(self): self.stop_reading = False self.thread = threading.Thread(target=self.update_table) self.thread.start() def stop(self): self.stop_reading = True self.thread.join() def clear(self): self.table.clear() self.book.clear()
def test_manager_host_table_transitive(self): manager = PerspectiveManager() table = Table(data) manager.host_table("table1", table) table.update({"a": [4, 5, 6], "b": ["d", "e", "f"]}) assert manager.get_table("table1").size() == 6
class Runner(object): ITERATIONS = 10 def __init__(self, suite): """Initializes a benchmark runner for the `Suite`. Args: suite (Suite) : A class that inherits from `Suite`, with any number of instance methods decorated with `@benchmark`. """ self._suite = suite self._benchmarks = [] self._table = None self._WROTE_RESULTS = False self._HOSTING = False self._suite.register_benchmarks() class_attrs = self._suite.__class__.__dict__.items() instance_attrs = self._suite.__dict__.items() for (k, v) in class_attrs: if hasattr(v, "benchmark") and getattr(v, "benchmark") is True: logging.info("Registering {0}".format(k)) self._benchmarks.append(v) for (k, v) in instance_attrs: if hasattr(v, "benchmark") and getattr(v, "benchmark") is True: logging.info("Registering {0}".format(k)) self._benchmarks.append(v) # Write results on SIGINT signal.signal(signal.SIGINT, self.sigint_handler) def sigint_handler(self, signum, frame): """On SIGINT, host the results over a websocket.""" if not self._WROTE_RESULTS: self.write_results() if not self._HOSTING: self.host_results() else: sys.exit(0) def host_results(self): """Create a tornado application that hosts the results table over a websocket.""" if self._table is None: return MANAGER = PerspectiveManager() MANAGER.host_table("benchmark_results", self._table) application = tornado.web.Application([ (r"/", BenchmarkTornadoHandler), # create a websocket endpoint that the client Javascript can access ( r"/websocket", PerspectiveTornadoHandler, { "manager": MANAGER, "check_origin": True }, ), ]) self._HOSTING = True application.listen(8888) logging.critical("Displaying results at http://localhost:8888") loop = tornado.ioloop.IOLoop.current() loop.start() def write_results(self): if self._table is None: return name = "benchmark_{}_.arrow".format(datetime.now().isoformat()) logging.info("Writing results to `{}`".format(name)) arrow_path = os.path.join(os.path.dirname(__file__), name) with open(arrow_path, "wb") as file: arrow = self._table.view().to_arrow() file.write(arrow) self._WROTE_RESULTS = True def run_method(self, func, *args, **kwargs): """Wrap the benchmark `func` with timing code and run for n `ITERATIONS`, returning a result row that can be fed into Perspective. """ overall_result = { k.replace("__BENCH__", ""): v for (k, v) in func.__dict__.items() if "__BENCH__" in k } result = timeit(func, number=Runner.ITERATIONS) / Runner.ITERATIONS overall_result["__TIME__"] = result return overall_result def print_result(self, result): print("{}::{} ({}):{:30}{:>30}".format( result["group"], result["name"], result["version"], "", result["__TIME__"], )) def run(self, version): """Runs each benchmark function from the suite for n `ITERATIONS`, timing each function and writing the results to a `perspective.Table`. """ logging.info("Running benchmark suite...") for benchmark in self._benchmarks: result = self.run_method(benchmark) result["version"] = version self.print_result(result) if self._table is None: arrow_path = os.path.join(os.path.dirname(__file__), "benchmark.arrow") if os.path.exists(arrow_path): # if arrow exists, append to it with open(arrow_path, "rb") as arr: print( "Reading table from pre-existing benchmark.arrow") self._table = Table(arr.read()) else: print("Creating new table") self._table = Table([result]) else: self._table.update([result])
def test_async_multiple_managers_mixed_queue_process_multiple_ports(self): # mutate when synchronously calling queue_process for each update SENTINEL_2 = AsyncSentinel(0) def sync_queue_process(table_id, state_manager): SENTINEL_2.set(SENTINEL_2.get() - 1) state_manager.call_process(table_id) tbl = Table({ "a": int, "b": float, "c": str }) tbl2 = Table({ "a": int, "b": float, "c": str }) port_ids = [0] port_data = [{ "a": 0, "b": 0, "c": "0" }] for i in range(10): port_id = tbl.make_port() port_id2 = tbl2.make_port() assert port_id == port_id2 port_ids.append(port_id) port_data.append({ "a": port_id, "b": port_id * 1.5, "c": str(port_id) }) manager = PerspectiveManager() manager2 = PerspectiveManager() manager.host_table("tbl", tbl) manager2.host_table("tbl2", tbl2) # manager uses tornado, manager2 is synchronous manager._set_queue_process(TestAsync.wrapped_queue_process) manager2._set_queue_process(sync_queue_process) random.shuffle(port_ids) for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl.update([port_data[idx]], port_id=port_id) tbl2.update([port_data[idx]], port_id=port_id) assert SENTINEL.get() != 0 assert SENTINEL_2.get() == -11 tbl2.delete() tbl.delete()
class DataHost(object): """Stores cleaned and transformed DataFrames in memory as `perspective.Table`s, and provides getters for the `Table`s to be used elsewhere.""" def __init__(self): self.state_schema = { "Date": date, "Cumulative Deaths": int, "Cumulative Cases": int, "New Deaths": int, "New Cases": int, "Population (2019 Estimate)": int, "State": str, "State Name": str, "Governor": str, "State Senate": str, "State House": str, } self.county_schema = { "County FIPS": int, "County": str, "State": str, "State Name": str, "Date": date, "Cumulative Cases": int, "Cumulative Deaths": int, "New Deaths": int, "New Cases": int, "Population (2018 Estimate)": int, "Unemployment Rate % (2018 Estimate)": int, "Unemployed (2018 Estimate)": int, "Employed (2018 Estimate)": int, "Civilian Labor Force (2018 Estimate)": int, "Median Household Income (2018 Estimate)": float, } state_start = time.time() self._state_data = DataTransformer.state_data() logging.info("Cleaning state data took {}s".format(time.time() - state_start)) county_start = time.time() self._county_data = DataTransformer.county_data() logging.info("Cleaning county data took {}s".format(time.time() - county_start)) state_table_start = time.time() self.state_table = Table(self.state_schema) logging.info("Init state table took {}s".format(time.time() - state_table_start)) county_table_start = time.time() self.county_table = Table(self.county_schema) logging.info("Init county table took {}s".format(time.time() - county_table_start)) logging.info("Tables initialized with schema") # Call `update` on the `Table` with the dataset state_update_start = time.time() self.state_table.update(self._state_data) logging.info("Update state table took {}s".format(time.time() - state_update_start)) logging.info("State table size: {}".format(self.state_table.size())) county_update_start = time.time() self.county_table.update(self._county_data) logging.info("Update county table took {}s".format(time.time() - county_update_start)) logging.info("County table size: {}".format(self.county_table.size())) logging.info("Tables updated with latest dataset")