def test_async_queue_process(self): tbl = Table({ "a": int, "b": float, "c": str }) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 for i in range(5): tbl.update([data[i]]) table_id = tbl._table.get_id() pool = tbl._table.get_pool() assert _PerspectiveStateManager.TO_PROCESS == { table_id: pool } assert tbl.view().to_records() == data[:5] # should have flushed the process queue assert _PerspectiveStateManager.TO_PROCESS == {}
def test_async_queue_process_multiple_ports(self): tbl = Table({"a": int, "b": float, "c": str}) port_ids = [0] port_data = [{"a": 0, "b": 0, "c": "0"}] for i in range(10): port_id = tbl.make_port() port_ids.append(port_id) port_data.append({ "a": port_id, "b": port_id * 1.5, "c": str(port_id) }) assert port_ids == list(range(0, 11)) manager = PerspectiveManager() manager.host(tbl) manager.set_loop_callback(TestAsync.loop.add_callback) assert syncify(lambda: tbl.size())() == 0 random.shuffle(port_ids) @syncify def _tbl_task(): for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl.update([port_data[idx]], port_id=port_id) size = tbl.size() tbl.delete() return size assert len(port_ids) == 11 assert _tbl_task() == 11
def test_viewer_update_df(self): table = Table({"a": [1, 2, 3]}) viewer = PerspectiveViewer() viewer.load(table) viewer.update(pd.DataFrame({"a": [4, 5, 6]})) assert table.size() == 6 assert viewer.table.size() == 6 assert viewer.table.view().to_dict() == {"a": [1, 2, 3, 4, 5, 6]}
def test_viewer_update_list(self): table = Table({"a": [1, 2, 3]}) viewer = PerspectiveViewer() viewer.load(table) viewer.update([{"a": 4}, {"a": 5}, {"a": 6}]) assert table.size() == 6 assert viewer.table.size() == 6 assert viewer.table.view().to_dict() == {"a": [1, 2, 3, 4, 5, 6]}
def test_async_multiple_managers_mixed_queue_process_multiple_ports(self): sentinel = {"async": 0, "sync": 0} def _counter(key, f, *args, **kwargs): sentinel[key] += 1 return f(*args, **kwargs) sync_process = partial(_counter, "sync") async_process = partial(TestAsync.loop.add_timeout, 1, _counter, "async") tbl = Table({"a": int, "b": float, "c": str}) tbl2 = Table({"a": int, "b": float, "c": str}) port_ids = [0] port_data = [{"a": 0, "b": 0, "c": "0"}] for i in range(10): port_id = tbl.make_port() port_id2 = tbl2.make_port() assert port_id == port_id2 port_ids.append(port_id) port_data.append({ "a": port_id, "b": port_id * 1.5, "c": str(port_id) }) manager = PerspectiveManager() manager2 = PerspectiveManager() manager.host_table("tbl", tbl) manager2.host_table("tbl2", tbl2) # manager uses tornado, manager2 is synchronous manager.set_loop_callback(async_process) manager2.set_loop_callback(sync_process) random.shuffle(port_ids) @syncify def _task(): for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl.update([port_data[idx]], port_id=port_id) _task() for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl2.update([port_data[idx]], port_id=port_id) @syncify def _get_size(): size = tbl.size() tbl.delete() return size assert _get_size() == 11 assert tbl2.size() == 11 assert sentinel["async"] == 2 assert sentinel["sync"] == 12 tbl2.delete()
def callback(delta): table = Table(delta) assert table.size() == 1 assert table.schema() == { "a": int, "b": str } table.delete() s.set(s.get() + 100)
def update_callback(port_id, delta): table = Table(delta) assert table.size() == 1 assert table.schema() == { "a": int, "b": str } table.delete() s.set(s.get() + 1)
def test_viewer_update_dict_partial(self): table = Table({"a": [1, 2, 3], "b": [5, 6, 7]}, index="a") viewer = PerspectiveViewer() viewer.load(table) viewer.update({"a": [1, 2, 3], "b": [8, 9, 10]}) assert table.size() == 3 assert viewer.table.size() == 3 assert viewer.table.view().to_dict() == { "a": [1, 2, 3], "b": [8, 9, 10] }
def test_async_multiple_managers_delayed_process(self): sentinel = {"async": 0, "sync": 0} def _counter(key, f, *args, **kwargs): sentinel[key] += 1 return f(*args, **kwargs) short_delay_queue_process = partial(_counter, "sync") long_delay_queue_process = partial(TestAsync.loop.add_timeout, 1, _counter, "async") tbl = Table({"a": int, "b": float, "c": str}) tbl2 = Table({"a": int, "b": float, "c": str}) manager = PerspectiveManager() manager2 = PerspectiveManager() manager.host_table("tbl", tbl) manager2.host_table("tbl2", tbl2) manager.set_loop_callback(short_delay_queue_process) manager2.set_loop_callback(long_delay_queue_process) @syncify def _tbl_task(): for i in range(10): tbl2.update([data[i]]) _tbl_task() for i in range(10): tbl.update([data[i]]) @syncify def _tbl_task2(): size = tbl2.size() tbl2.delete() return size assert _tbl_task2() == 10 assert tbl.size() == 10 assert sentinel["async"] == 2 assert sentinel["sync"] == 11 tbl.delete()
def test_async_queue_process(self): tbl = Table({ "a": int, "b": float, "c": str }) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 for i in range(5): tbl.update([data[i]]) # process should have been called at least once assert SENTINEL.get() > 0 tbl.delete()
def test_async_queue_process_multiple_ports(self): tbl = Table({ "a": int, "b": float, "c": str }) port_ids = [0] port_data = [{ "a": 0, "b": 0, "c": "0" }] for i in range(10): port_id = tbl.make_port() port_ids.append(port_id) port_data.append({ "a": port_id, "b": port_id * 1.5, "c": str(port_id) }) assert port_ids == list(range(0, 11)) manager = PerspectiveManager() manager._set_queue_process(TestAsync.wrapped_queue_process) manager.host(tbl) assert tbl.size() == 0 random.shuffle(port_ids) for port_id in port_ids: idx = port_id if port_id < len(port_ids) else len(port_ids) - 1 tbl.update([port_data[idx]], port_id=port_id) # assert that process is being called asynchronously assert SENTINEL.get() > 0 tbl.delete()
def update_callback(rows): table = Table(rows) assert table.size() == 1 assert table.schema() == {"a": int, "b": str} table.delete() s.set(s.get() + 1)
class DataHost(object): """Stores cleaned and transformed DataFrames in memory as `perspective.Table`s, and provides getters for the `Table`s to be used elsewhere.""" def __init__(self): self.state_schema = { "Date": date, "Cumulative Deaths": int, "Cumulative Cases": int, "New Deaths": int, "New Cases": int, "Population (2019 Estimate)": int, "State": str, "State Name": str, "Governor": str, "State Senate": str, "State House": str, } self.county_schema = { "County FIPS": int, "County": str, "State": str, "State Name": str, "Date": date, "Cumulative Cases": int, "Cumulative Deaths": int, "New Deaths": int, "New Cases": int, "Population (2018 Estimate)": int, "Unemployment Rate % (2018 Estimate)": int, "Unemployed (2018 Estimate)": int, "Employed (2018 Estimate)": int, "Civilian Labor Force (2018 Estimate)": int, "Median Household Income (2018 Estimate)": float, } state_start = time.time() self._state_data = DataTransformer.state_data() logging.info("Cleaning state data took {}s".format(time.time() - state_start)) county_start = time.time() self._county_data = DataTransformer.county_data() logging.info("Cleaning county data took {}s".format(time.time() - county_start)) state_table_start = time.time() self.state_table = Table(self.state_schema) logging.info("Init state table took {}s".format(time.time() - state_table_start)) county_table_start = time.time() self.county_table = Table(self.county_schema) logging.info("Init county table took {}s".format(time.time() - county_table_start)) logging.info("Tables initialized with schema") # Call `update` on the `Table` with the dataset state_update_start = time.time() self.state_table.update(self._state_data) logging.info("Update state table took {}s".format(time.time() - state_update_start)) logging.info("State table size: {}".format(self.state_table.size())) county_update_start = time.time() self.county_table.update(self._county_data) logging.info("Update county table took {}s".format(time.time() - county_update_start)) logging.info("County table size: {}".format(self.county_table.size())) logging.info("Tables updated with latest dataset")