Esempio n. 1
0
    def test_async_queue_process(self):
        tbl = Table({
            "a": int,
            "b": float,
            "c": str
        })
        manager = PerspectiveManager()
        manager._set_queue_process(TestAsync.wrapped_queue_process)
        manager.host(tbl)

        assert tbl.size() == 0

        for i in range(5):
            tbl.update([data[i]])

        table_id = tbl._table.get_id()
        pool = tbl._table.get_pool()

        assert _PerspectiveStateManager.TO_PROCESS == {
            table_id: pool
        }
        assert tbl.view().to_records() == data[:5]

        # should have flushed the process queue
        assert _PerspectiveStateManager.TO_PROCESS == {}
Esempio n. 2
0
    def test_async_queue_process_multiple_ports(self):
        tbl = Table({"a": int, "b": float, "c": str})
        port_ids = [0]
        port_data = [{"a": 0, "b": 0, "c": "0"}]

        for i in range(10):
            port_id = tbl.make_port()
            port_ids.append(port_id)
            port_data.append({
                "a": port_id,
                "b": port_id * 1.5,
                "c": str(port_id)
            })

        assert port_ids == list(range(0, 11))
        manager = PerspectiveManager()
        manager.host(tbl)
        manager.set_loop_callback(TestAsync.loop.add_callback)

        assert syncify(lambda: tbl.size())() == 0

        random.shuffle(port_ids)

        @syncify
        def _tbl_task():
            for port_id in port_ids:
                idx = port_id if port_id < len(port_ids) else len(port_ids) - 1
                tbl.update([port_data[idx]], port_id=port_id)
            size = tbl.size()
            tbl.delete()
            return size

        assert len(port_ids) == 11
        assert _tbl_task() == 11
Esempio n. 3
0
 def test_viewer_update_df(self):
     table = Table({"a": [1, 2, 3]})
     viewer = PerspectiveViewer()
     viewer.load(table)
     viewer.update(pd.DataFrame({"a": [4, 5, 6]}))
     assert table.size() == 6
     assert viewer.table.size() == 6
     assert viewer.table.view().to_dict() == {"a": [1, 2, 3, 4, 5, 6]}
Esempio n. 4
0
 def test_viewer_update_list(self):
     table = Table({"a": [1, 2, 3]})
     viewer = PerspectiveViewer()
     viewer.load(table)
     viewer.update([{"a": 4}, {"a": 5}, {"a": 6}])
     assert table.size() == 6
     assert viewer.table.size() == 6
     assert viewer.table.view().to_dict() == {"a": [1, 2, 3, 4, 5, 6]}
Esempio n. 5
0
    def test_async_multiple_managers_mixed_queue_process_multiple_ports(self):
        sentinel = {"async": 0, "sync": 0}

        def _counter(key, f, *args, **kwargs):
            sentinel[key] += 1
            return f(*args, **kwargs)

        sync_process = partial(_counter, "sync")
        async_process = partial(TestAsync.loop.add_timeout, 1, _counter,
                                "async")
        tbl = Table({"a": int, "b": float, "c": str})
        tbl2 = Table({"a": int, "b": float, "c": str})
        port_ids = [0]
        port_data = [{"a": 0, "b": 0, "c": "0"}]

        for i in range(10):
            port_id = tbl.make_port()
            port_id2 = tbl2.make_port()
            assert port_id == port_id2
            port_ids.append(port_id)
            port_data.append({
                "a": port_id,
                "b": port_id * 1.5,
                "c": str(port_id)
            })

        manager = PerspectiveManager()
        manager2 = PerspectiveManager()
        manager.host_table("tbl", tbl)
        manager2.host_table("tbl2", tbl2)

        # manager uses tornado, manager2 is synchronous
        manager.set_loop_callback(async_process)
        manager2.set_loop_callback(sync_process)
        random.shuffle(port_ids)

        @syncify
        def _task():
            for port_id in port_ids:
                idx = port_id if port_id < len(port_ids) else len(port_ids) - 1
                tbl.update([port_data[idx]], port_id=port_id)

        _task()
        for port_id in port_ids:
            idx = port_id if port_id < len(port_ids) else len(port_ids) - 1
            tbl2.update([port_data[idx]], port_id=port_id)

        @syncify
        def _get_size():
            size = tbl.size()
            tbl.delete()
            return size

        assert _get_size() == 11
        assert tbl2.size() == 11
        assert sentinel["async"] == 2
        assert sentinel["sync"] == 12
        tbl2.delete()
Esempio n. 6
0
 def callback(delta):
     table = Table(delta)
     assert table.size() == 1
     assert table.schema() == {
         "a": int,
         "b": str
     }
     table.delete()
     s.set(s.get() + 100)
Esempio n. 7
0
 def update_callback(port_id, delta):
     table = Table(delta)
     assert table.size() == 1
     assert table.schema() == {
         "a": int,
         "b": str
     }
     table.delete()
     s.set(s.get() + 1)
Esempio n. 8
0
 def test_viewer_update_dict_partial(self):
     table = Table({"a": [1, 2, 3], "b": [5, 6, 7]}, index="a")
     viewer = PerspectiveViewer()
     viewer.load(table)
     viewer.update({"a": [1, 2, 3], "b": [8, 9, 10]})
     assert table.size() == 3
     assert viewer.table.size() == 3
     assert viewer.table.view().to_dict() == {
         "a": [1, 2, 3],
         "b": [8, 9, 10]
     }
Esempio n. 9
0
    def test_async_multiple_managers_delayed_process(self):
        sentinel = {"async": 0, "sync": 0}

        def _counter(key, f, *args, **kwargs):
            sentinel[key] += 1
            return f(*args, **kwargs)

        short_delay_queue_process = partial(_counter, "sync")
        long_delay_queue_process = partial(TestAsync.loop.add_timeout, 1,
                                           _counter, "async")

        tbl = Table({"a": int, "b": float, "c": str})
        tbl2 = Table({"a": int, "b": float, "c": str})

        manager = PerspectiveManager()
        manager2 = PerspectiveManager()
        manager.host_table("tbl", tbl)
        manager2.host_table("tbl2", tbl2)

        manager.set_loop_callback(short_delay_queue_process)
        manager2.set_loop_callback(long_delay_queue_process)

        @syncify
        def _tbl_task():
            for i in range(10):
                tbl2.update([data[i]])

        _tbl_task()
        for i in range(10):
            tbl.update([data[i]])

        @syncify
        def _tbl_task2():
            size = tbl2.size()
            tbl2.delete()
            return size

        assert _tbl_task2() == 10
        assert tbl.size() == 10
        assert sentinel["async"] == 2
        assert sentinel["sync"] == 11

        tbl.delete()
Esempio n. 10
0
    def test_async_queue_process(self):
        tbl = Table({
            "a": int,
            "b": float,
            "c": str
        })
        manager = PerspectiveManager()
        manager._set_queue_process(TestAsync.wrapped_queue_process)
        manager.host(tbl)

        assert tbl.size() == 0

        for i in range(5):
            tbl.update([data[i]])

        # process should have been called at least once
        assert SENTINEL.get() > 0

        tbl.delete()
Esempio n. 11
0
    def test_async_queue_process_multiple_ports(self):
        tbl = Table({
            "a": int,
            "b": float,
            "c": str
        })

        port_ids = [0]
        port_data = [{
            "a": 0,
            "b": 0,
            "c": "0"
        }]

        for i in range(10):
            port_id = tbl.make_port()
            port_ids.append(port_id)
            port_data.append({
                "a": port_id,
                "b": port_id * 1.5,
                "c": str(port_id)
            })

        assert port_ids == list(range(0, 11))

        manager = PerspectiveManager()
        manager._set_queue_process(TestAsync.wrapped_queue_process)
        manager.host(tbl)

        assert tbl.size() == 0

        random.shuffle(port_ids)

        for port_id in port_ids:
            idx = port_id if port_id < len(port_ids) else len(port_ids) - 1
            tbl.update([port_data[idx]], port_id=port_id)

        # assert that process is being called asynchronously
        assert SENTINEL.get() > 0

        tbl.delete()
Esempio n. 12
0
 def update_callback(rows):
     table = Table(rows)
     assert table.size() == 1
     assert table.schema() == {"a": int, "b": str}
     table.delete()
     s.set(s.get() + 1)
Esempio n. 13
0
class DataHost(object):
    """Stores cleaned and transformed DataFrames in memory as `perspective.Table`s,
    and provides getters for the `Table`s to be used elsewhere."""

    def __init__(self):
        self.state_schema = {
            "Date": date,
            "Cumulative Deaths": int,
            "Cumulative Cases": int,
            "New Deaths": int,
            "New Cases": int,
            "Population (2019 Estimate)": int,
            "State": str,
            "State Name": str,
            "Governor": str,
            "State Senate": str,
            "State House": str,
        }

        self.county_schema = {
            "County FIPS": int,
            "County": str,
            "State": str,
            "State Name": str,
            "Date": date,
            "Cumulative Cases": int,
            "Cumulative Deaths": int,
            "New Deaths": int,
            "New Cases": int,
            "Population (2018 Estimate)": int,
            "Unemployment Rate % (2018 Estimate)": int,
            "Unemployed (2018 Estimate)": int,
            "Employed (2018 Estimate)": int,
            "Civilian Labor Force (2018 Estimate)": int,
            "Median Household Income (2018 Estimate)": float,
        }

        state_start = time.time()
        self._state_data = DataTransformer.state_data()
        logging.info("Cleaning state data took {}s".format(time.time() - state_start))

        county_start = time.time()
        self._county_data = DataTransformer.county_data()
        logging.info("Cleaning county data took {}s".format(time.time() - county_start))

        state_table_start = time.time()
        self.state_table = Table(self.state_schema)
        logging.info("Init state table took {}s".format(time.time() - state_table_start))

        county_table_start = time.time()
        self.county_table = Table(self.county_schema)
        logging.info("Init county table took {}s".format(time.time() - county_table_start))

        logging.info("Tables initialized with schema")

        # Call `update` on the `Table` with the dataset
        state_update_start = time.time()
        self.state_table.update(self._state_data)
        logging.info("Update state table took {}s".format(time.time() - state_update_start))
        logging.info("State table size: {}".format(self.state_table.size()))
        
        county_update_start = time.time()
        self.county_table.update(self._county_data)
        logging.info("Update county table took {}s".format(time.time() - county_update_start))
        logging.info("County table size: {}".format(self.county_table.size()))

        logging.info("Tables updated with latest dataset")