def test_get_samples(test_client, login_as_admin, virtual_dataset): """ Dataset API: Test get dataset samples """ # 1. should cache data uri = ( f"/datasource/samples?datasource_id={virtual_dataset.id}&datasource_type=table" ) # feeds data test_client.post(uri) # get from cache rv = test_client.post(uri) rv_data = json.loads(rv.data) assert rv.status_code == 200 assert len(rv_data["result"]["data"]) == 10 assert QueryCacheManager.has( rv_data["result"]["cache_key"], region=CacheRegion.DATA, ) assert rv_data["result"]["is_cached"] # 2. should read through cache data uri2 = f"/datasource/samples?datasource_id={virtual_dataset.id}&datasource_type=table&force=true" # feeds data test_client.post(uri2) # force query rv2 = test_client.post(uri2) rv_data2 = json.loads(rv2.data) assert rv2.status_code == 200 assert len(rv_data2["result"]["data"]) == 10 assert QueryCacheManager.has( rv_data2["result"]["cache_key"], region=CacheRegion.DATA, ) assert not rv_data2["result"]["is_cached"] # 3. data precision assert "colnames" in rv_data2["result"] assert "coltypes" in rv_data2["result"] assert "data" in rv_data2["result"] eager_samples = virtual_dataset.database.get_df( f"select * from ({virtual_dataset.sql}) as tbl" f' limit {app.config["SAMPLES_ROW_LIMIT"]}') # the col3 is Decimal eager_samples["col3"] = eager_samples["col3"].apply(float) eager_samples = eager_samples.to_dict(orient="records") assert eager_samples == rv_data2["result"]["data"]
def test_get_samples_on_physical_dataset(test_client, login_as_admin, physical_dataset): uri = ( f"/datasource/samples?datasource_id={physical_dataset.id}&datasource_type=table" ) rv = test_client.post(uri) assert rv.status_code == 200 assert QueryCacheManager.has( rv.json["result"]["cache_key"], region=CacheRegion.DATA ) assert len(rv.json["result"]["data"]) == 10