def test_transpose(custom_data, unittest): from dtale.views import build_dtypes_state global_state.clear_store() with app.test_client() as c: data = {c.port: custom_data} dtypes = {c.port: build_dtypes_state(custom_data)} settings = {c.port: {}} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) reshape_cfg = dict(index=["security_id"], columns=["Col0"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) new_key = int(c.port) + 1 assert "error" in response_data min_date = custom_data["date"].min().strftime("%Y-%m-%d") global_state.set_settings(c.port, dict(query="date == '{}'".format(min_date))) reshape_cfg = dict(index=["date", "security_id"], columns=["Col0"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], [ "index", "{} 00:00:00 100000".format(min_date), "{} 00:00:00 100001".format(min_date), ], ) assert len(global_state.get_data(new_key)) == 1 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) reshape_cfg = dict(index=["date", "security_id"]) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="override", type="transpose", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == c.port
def test_stack(unittest): from dtale.views import build_dtypes_state import dtale.global_state as global_state global_state.clear_store() df1 = pd.DataFrame({ "A": ["A0", "A1"], "B": ["B0", "B1"], "C": ["C0", "C1"], "D": ["D0", "D1"], }) df2 = pd.DataFrame({ "A": ["A2", "A3"], "B": ["B3", "B3"], "C": ["C3", "C3"], "D": ["D3", "D3"], }) with app.test_client() as c: data = {"1": df1, "2": df2} dtypes = {k: build_dtypes_state(v) for k, v in data.items()} settings = {k: {} for k in data.keys()} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) datasets = [dict(dataId="1", columns=[]), dict(dataId="2", columns=[])] config = dict(ignore_index=False) resp = c.post( "/dtale/merge", data=dict( action="stack", config=json.dumps(config), datasets=json.dumps(datasets), ), ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) unittest.assertEqual(list(final_df["A"].values), ["A0", "A1", "A2", "A3"]) unittest.assertEqual(list(final_df["index"].values), [0, 1, 0, 1]) config["ignoreIndex"] = True resp = c.post( "/dtale/merge", data=dict( action="stack", config=json.dumps(config), datasets=json.dumps(datasets), ), ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) assert "index" not in final_df.columns unittest.assertEqual(list(final_df["A"].values), ["A0", "A1", "A2", "A3"])
def test_get_column_analysis_geolocation(unittest): df = pd.DataFrame(dict(a=[1, 2, 3], b=[3, 4, 5])) with app.test_client() as c: build_data_inst({c.port: df}) settings = {c.port: {}} build_settings(settings) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="a", type="geolocation", latCol="a", lonCol="b"), ) response_data = json.loads(response.data) unittest.assertEqual(response_data["lat"], [1, 2, 3]) unittest.assertEqual(response_data["lon"], [3, 4, 5])
def test_get_column_analysis_word_value_count(unittest): df = pd.DataFrame(dict(a=["a b c", "d e f", "g h i"], b=[3, 4, 5])) with app.test_client() as c: build_data_inst({c.port: df}) settings = {c.port: {}} build_settings(settings) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="a", type="word_value_counts"), ) response_data = json.loads(response.data) unittest.assertEqual( response_data["labels"], ["a", "b", "c", "d", "e", "f", "g", "h", "i"] ) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="a", type="word_value_counts", ordinalCol="b", ordinalAgg="mean", cleaner="underscore_to_space", ), ) response_data = json.loads(response.data) unittest.assertEqual(response_data["ordinal"], [3, 3, 3, 4, 4, 4, 5, 5, 5]) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="a", type="word_value_counts", ordinalCol="b", ordinalAgg="pctsum", ), ) response_data = json.loads(response.data) unittest.assertEqual( response_data["ordinal"], [ 0.083333, 0.083333, 0.083333, 0.111111, 0.111111, 0.111111, 0.138889, 0.138889, 0.138889, ], )
def test_get_column_analysis_qq(): import dtale.views as views df = pd.DataFrame(dict(a=np.random.normal(loc=20, scale=5, size=100))) with app.test_client() as c: build_data_inst({c.port: df}) build_dtypes({c.port: views.build_dtypes_state(df)}) settings = {c.port: {}} build_settings(settings) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="a", type="qq"), ) response_data = json.loads(response.data) assert len(response_data["data"]) == 100
def test_get_column_analysis_kde(): import dtale.views as views df = pd.DataFrame(dict(a=np.random.randn(100))) with app.test_client() as c: build_data_inst({c.port: df}) build_dtypes({c.port: views.build_dtypes_state(df)}) settings = {c.port: {}} build_settings(settings) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="a", type="histogram", bins=50), ) response_data = json.loads(response.data) assert len(response_data["kde"]) == 51
def test_probability_histogram(unittest, test_data): import dtale.views as views with app.test_client() as c: with ExitStack(): build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) build_settings({c.port: {}}) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo", density="true"), ) response_data = json.loads(response.data) assert response.status_code == 200 assert "np.histogram(s['foo'], density=True)" in response_data[ "code"]
def test_edit_float(): from dtale.views import build_dtypes_state, format_data df = edit_data() df, _ = format_data(df) with app.test_client() as c: data = {c.port: df} build_data_inst(data) settings = {c.port: {"locked": ["a"]}} build_settings(settings) dtypes = {c.port: build_dtypes_state(df)} build_dtypes(dtypes) resp = c.get( "/dtale/edit-cell/{}/b".format(c.port), query_string=dict(rowIndex=0, updated=2.5), ) assert "error" not in resp.json assert data[c.port]["b"].values[0] == 2.5
def test_resample(unittest): from dtale.views import build_dtypes_state, format_data start, end = "2000-10-01 23:30:00", "2000-10-03 00:30:00" rng = pd.date_range(start, end, freq="7min") ts = pd.Series(np.arange(len(rng)) * 3, index=rng) ts2 = pd.Series(np.arange(len(rng)) * 0.32, index=rng) df = pd.DataFrame(data={"col1": ts, "col2": ts2}) df, _ = format_data(df) global_state.clear_store() with app.test_client() as c: data = {c.port: df} dtypes = {c.port: build_dtypes_state(df)} settings = {c.port: {}} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) reshape_cfg = dict(index="index", columns=["col1"], freq="17min", agg="mean") resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="resample", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) new_key = int(c.port) + 1 assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], ["index_17min", "col1"], ) assert len(global_state.get_data(new_key)) == 90 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
def test_edit_timedelta(): from dtale.views import build_dtypes_state, format_data df = edit_data() df, _ = format_data(df) with app.test_client() as c: data = {c.port: df} build_data_inst(data) settings = {c.port: {"locked": ["a"]}} build_settings(settings) dtypes = {c.port: build_dtypes_state(df)} build_dtypes(dtypes) resp = c.get( "/dtale/edit-cell/{}".format(c.port), query_string=dict(col="g", rowIndex=0, updated="0 days 00:09:20"), ) assert "error" not in resp.json assert pd.Timedelta(data[c.port]["g"].values[0]) == pd.Timedelta( "0 days 00:09:20" )
def test_edit_timestamp(): from dtale.views import build_dtypes_state, format_data df = edit_data() df, _ = format_data(df) with app.test_client() as c: data = {c.port: df} build_data_inst(data) settings = {c.port: {"locked": ["a"]}} build_settings(settings) dtypes = {c.port: build_dtypes_state(df)} build_dtypes(dtypes) resp = c.get( "/dtale/edit-cell/{}/e".format(c.port), query_string=dict(rowIndex=0, updated="20000101 11:58:59.999999999"), ) assert "error" not in resp.json assert pd.Timestamp(data[c.port]["e"].values[0]) == pd.Timestamp( "2000-01-01 11:58:59.999999999" )
def test_edit_to_nan(): from dtale.views import build_dtypes_state, format_data df = edit_data() df, _ = format_data(df) with app.test_client() as c: data = {c.port: df} build_data_inst(data) settings = {c.port: {"locked": ["a"]}} build_settings(settings) dtypes = {c.port: build_dtypes_state(df)} build_dtypes(dtypes) c.get( "/dtale/edit-cell/{}/a".format(c.port), query_string=dict(rowIndex=0, updated="nan"), ) assert pd.isnull(data[c.port].a.values[0]) c.get( "/dtale/edit-cell/{}/b".format(c.port), query_string=dict(rowIndex=0, updated="inf"), ) assert np.isinf(data[c.port].b.values[0])
def test_get_column_analysis(unittest, test_data): import dtale.views as views with app.test_client() as c: with ExitStack() as stack: build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) build_settings({c.port: {}}) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo") ) response_data = json.loads(response.data) expected = dict( labels=[ "0.6", "0.6", "0.7", "0.7", "0.8", "0.8", "0.9", "0.9", "0.9", "1.0", "1.1", "1.1", "1.1", "1.2", "1.2", "1.3", "1.4", "1.4", "1.5", "1.5", ], data=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0], desc={ "count": "50", "std": "0", "min": "1", "max": "1", "50%": "1", "25%": "1", "75%": "1", "mean": "1", "missing_ct": "0", "missing_pct": 0.0, "total_count": "50", "kurt": 0.0, "skew": 0.0, }, chart_type="histogram", dtype="int64", query="", ) unittest.assertEqual( {k: v for k, v in response_data.items() if k not in ["code", "cols"]}, expected, "should return 20-bin histogram for foo", ) unittest.assertEqual(response_data["code"], HISTOGRAM_CODE) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo", bins=5), ) response_data = json.loads(response.data) expected = dict( labels=["0.7", "0.9", "1.1", "1.3", "1.5"], data=[0, 0, 50, 0, 0], desc={ "count": "50", "std": "0", "min": "1", "max": "1", "50%": "1", "25%": "1", "75%": "1", "mean": "1", "missing_ct": "0", "missing_pct": 0.0, "total_count": "50", "kurt": 0.0, "skew": 0.0, }, chart_type="histogram", dtype="int64", query="", ) unittest.assertEqual( {k: v for k, v in response_data.items() if k not in ["code", "cols"]}, expected, "should return 5-bin histogram for foo", ) global_state.set_settings(c.port, dict(query="security_id > 10")) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo", bins=5), ) response_data = json.loads(response.data) expected = dict( labels=["0.7", "0.9", "1.1", "1.3", "1.5"], data=[0, 0, 39, 0, 0], desc={ "count": "39", "std": "0", "min": "1", "max": "1", "50%": "1", "25%": "1", "75%": "1", "mean": "1", "missing_ct": "0", "missing_pct": 0.0, "total_count": "39", "kurt": 0.0, "skew": 0.0, }, chart_type="histogram", dtype="int64", query="security_id > 10", ) unittest.assertEqual( {k: v for k, v in response_data.items() if k not in ["code", "cols"]}, expected, "should return a filtered 5-bin histogram for foo", ) global_state.set_settings(c.port, {}) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo", type="value_counts", top=2), ) response_data = json.loads(response.data) assert response_data["chart_type"] == "value_counts" response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="foo", type="value_counts", ordinalCol="bar", ordinalAgg="mean" ), ) response_data = json.loads(response.data) assert "ordinal" in response_data response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="foo", type="value_counts", ordinalCol="bar", ordinalAgg="pctsum", ), ) response_data = json.loads(response.data) assert "ordinal" in response_data response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="bar", type="categories", categoryCol="foo", categoryAgg="mean" ), ) response_data = json.loads(response.data) assert "count" in response_data response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict( col="bar", type="categories", categoryCol="foo", categoryAgg="pctsum", ), ) response_data = json.loads(response.data) assert "count" in response_data with app.test_client() as c: with ExitStack() as stack: build_data_inst({c.port: test_data}) stack.enter_context( mock.patch( "numpy.histogram", mock.Mock(side_effect=Exception("histogram failure")), ) ) response = c.get( "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo") ) response_data = json.loads(response.data) unittest.assertEqual( response_data["error"], "histogram failure", "should handle histogram exception", )
def test_pivot(custom_data, unittest): from dtale.views import build_dtypes_state global_state.clear_store() with app.test_client() as c: data = {c.port: custom_data} dtypes = {c.port: build_dtypes_state(custom_data)} settings = {c.port: {}} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) reshape_cfg = dict(index=["date"], columns=["security_id"], values=["Col0"], aggfunc="mean") resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="pivot", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) new_key = int(c.port) + 1 assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], ["date", "100000", "100001"], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None resp = c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) assert json.loads(resp.data)["success"] assert len(global_state.keys()) == 1 reshape_cfg["columnNameHeaders"] = True reshape_cfg["aggfunc"] = "sum" resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="pivot", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], ["date", "security_id-100000", "security_id-100001"], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) reshape_cfg["columnNameHeaders"] = False reshape_cfg["values"] = ["Col0", "Col1"] resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="pivot", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], [ "date", "Col0 100000", "Col0 100001", "Col1 100000", "Col1 100001" ], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
def test_aggregate(custom_data, unittest): from dtale.views import build_dtypes_state global_state.clear_store() with app.test_client() as c: data = {c.port: custom_data} dtypes = {c.port: build_dtypes_state(custom_data)} settings = {c.port: {}} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) reshape_cfg = dict( index="date", agg=dict(type="col", cols={ "Col0": ["sum", "mean"], "Col1": ["count"] }), ) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="aggregate", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) new_key = int(c.port) + 1 assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], ["date", "Col0 sum", "Col0 mean", "Col1 count"], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) reshape_cfg = dict(index="date", agg=dict(type="func", func="mean", cols=["Col0", "Col1"])) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="aggregate", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], ["date", "Col0", "Col1"], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key)) reshape_cfg = dict(index="date", agg=dict(type="func", func="mean")) resp = c.get( "/dtale/reshape/{}".format(c.port), query_string=dict(output="new", type="aggregate", cfg=json.dumps(reshape_cfg)), ) response_data = json.loads(resp.data) assert response_data["data_id"] == new_key assert len(global_state.keys()) == 2 unittest.assertEqual( [d["name"] for d in global_state.get_dtypes(new_key)], [ "date", "security_id", "int_val", "Col0", "Col1", "Col2", "bool_val" ], ) assert len(global_state.get_data(new_key)) == 365 assert global_state.get_settings(new_key).get( "startup_code") is not None c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
def test_get_correlations(unittest, test_data, rolling_data): import dtale.views as views with app.test_client() as c: test_data, _ = views.format_data(test_data) build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) response = c.get("/dtale/correlations/{}".format(c.port)) response_data = json.loads(response.data) expected = dict( data=[ dict(column="security_id", security_id=1.0, foo=None, bar=None), dict(column="foo", security_id=None, foo=None, bar=None), dict(column="bar", security_id=None, foo=None, bar=None), ], dates=[], pps=None, dummyColMappings={}, strings=["baz"], ) unittest.assertEqual( {k: v for k, v in response_data.items() if k != "code"}, expected, "should return correlations", ) unittest.assertEqual(response_data["code"], CORRELATIONS_CODE) response = c.get( "/dtale/correlations/{}".format(c.port), query_string={"encodeStrings": True}, ) response_data = json.loads(response.data) unittest.assertEqual(response_data["dummyColMappings"], {"baz": ["baz_baz"]}) with app.test_client() as c: build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) settings = {c.port: {"query": "missing_col == 'blah'"}} build_settings(settings) response = c.get("/dtale/correlations/{}".format(c.port)) response_data = json.loads(response.data) unittest.assertEqual( response_data["error"], "name 'missing_col' is not defined", "should handle correlations exception", ) with app.test_client() as c: test_data.loc[test_data.security_id == 1, "bar"] = np.nan test_data2 = test_data.copy() test_data2.loc[:, "date"] = pd.Timestamp("20000102") test_data = pd.concat([test_data, test_data2], ignore_index=True) build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) response = c.get("/dtale/correlations/{}".format(c.port)) response_data = json.loads(response.data) expected = dict( data=[ dict(column="security_id", security_id=1.0, foo=None, bar=None), dict(column="foo", security_id=None, foo=None, bar=None), dict(column="bar", security_id=None, foo=None, bar=None), ], dates=[dict(name="date", rolling=False)], pps=None, dummyColMappings={}, strings=["baz"], ) unittest.assertEqual( {k: v for k, v in response_data.items() if k != "code"}, expected, "should return correlations", ) df, _ = views.format_data(rolling_data) with app.test_client() as c: build_data_inst({c.port: df}) build_dtypes({c.port: views.build_dtypes_state(df)}) response = c.get("/dtale/correlations/{}".format(c.port)) response_data = json.loads(response.data) unittest.assertEqual( response_data["dates"], [dict(name="date", rolling=True)], "should return correlation date columns", )
def test_get_scatter(unittest, rolling_data): import dtale.views as views no_pps = parse_version(platform.python_version()) < parse_version("3.6.0") test_data = pd.DataFrame(build_ts_data(), columns=["date", "security_id", "foo", "bar"]) test_data.loc[:, "baz"] = "baz" test_data, _ = views.format_data(test_data) with app.test_client() as c: build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) params = dict(dateCol="date", cols=json.dumps(["foo", "bar"]), index=0) response = c.get("/dtale/scatter/{}".format(c.port), query_string=params) response_data = json.loads(response.data) expected = dict( y="bar", stats={ "pearson": 0.9999999999999999, "correlated": 5, "only_in_s0": 0, "only_in_s1": 0, "spearman": 0.9999999999999999, "pps": None if no_pps else { "baseline_score": 1.2, "case": "regression", "is_valid_score": True, "metric": "mean absolute error", "model": "DecisionTreeRegressor()", "model_score": 1.0, "ppscore": 0.16666666666666663, "x": "foo", "y": "bar", }, }, data={ "all": { "bar": [0, 1, 2, 3, 4], "_corr_index": [0, 1, 2, 3, 4], "x": [0, 1, 2, 3, 4], } }, max={ "bar": 4, "_corr_index": 4, "x": 4 }, min={ "bar": 0, "_corr_index": 0, "x": 0 }, x="foo", date=" for 2000-01-01", ) unittest.assertEqual( {k: v for k, v in response_data.items() if k != "code"}, expected, "should return scatter", ) unittest.assertEqual(response_data["code"], SCATTER_CODE) params["cols"] = json.dumps(["foo", "baz_baz"]) params["dummyCols"] = json.dumps(["baz"]) response = c.get("/dtale/scatter/{}".format(c.port), query_string=params) response_data = json.loads(response.data) unittest.assertEqual(response_data["data"]["all"]["baz_baz"], ["1", "1", "1", "1", "1"]) df, _ = views.format_data(rolling_data) with app.test_client() as c: build_data_inst({c.port: df}) build_dtypes({c.port: views.build_dtypes_state(df)}) params = dict( dateCol="date", cols=json.dumps(["0", "1"]), index=699, rolling=True, window="4", ) response = c.get("/dtale/scatter/{}".format(c.port), query_string=params) response_data = json.loads(response.data) assert len(response_data["data"]["all"]["1"]) == 4 assert sorted( response_data["data"]["all"]) == ["1", "_corr_index", "date", "x"] unittest.assertEqual( sorted(response_data["data"]["all"]["date"]), ["2019-11-28", "2019-11-29", "2019-11-30", "2019-12-01"], "should return scatter", ) test_data = pd.DataFrame(build_ts_data(size=15001, days=1), columns=["date", "security_id", "foo", "bar"]) with app.test_client() as c: build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) params = dict(dateCol="date", cols=json.dumps(["foo", "bar"]), date="20000101") response = c.get("/dtale/scatter/{}".format(c.port), query_string=params) response_data = json.loads(response.data) expected = dict( stats={ "correlated": 15001, "only_in_s0": 0, "only_in_s1": 0, "pearson": 1.0, "pps": None if no_pps else { "baseline_score": 3736.0678, "case": "regression", "is_valid_score": True, "metric": "mean absolute error", "model": "DecisionTreeRegressor()", "model_score": 2.2682, "ppscore": 0.9993928911033145, "x": "foo", "y": "bar", }, "spearman": 1.0, }, error= "Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...", traceback=CHART_POINTS_LIMIT, ) unittest.assertEqual( {k: v for k, v in response_data.items() if k != "code"}, expected, "should return scatter", ) with app.test_client() as c: build_data_inst({c.port: test_data}) build_dtypes({c.port: views.build_dtypes_state(test_data)}) settings = {c.port: {"query": "missing_col == 'blah'"}} build_settings(settings) params = dict(dateCol="date", cols=json.dumps(["foo", "bar"]), date="20000101") response = c.get("/dtale/scatter/{}".format(c.port), query_string=params) response_data = json.loads(response.data) unittest.assertEqual( response_data["error"], "name 'missing_col' is not defined", "should handle correlations exception", )
def test_get_correlations_ts(unittest, rolling_data): import dtale.views as views test_data = pd.DataFrame(build_ts_data(size=50), columns=["date", "security_id", "foo", "bar"]) test_data.loc[:, "baz"] = "baz" no_pps = parse_version(platform.python_version()) < parse_version("3.6.0") with app.test_client() as c: build_data_inst({c.port: test_data}) params = dict(dateCol="date", cols=json.dumps(["foo", "bar"])) response = c.get("/dtale/correlations-ts/{}".format(c.port), query_string=params) response_data = json.loads(response.data) expected = { "data": { "all": { "x": [ "2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05", ], "corr": [1.0, 1.0, 1.0, 1.0, 1.0], } }, "max": { "corr": 1.0, "x": "2000-01-05" }, "min": { "corr": 1.0, "x": "2000-01-01" }, "pps": None if no_pps else { "baseline_score": 12.5, "case": "regression", "is_valid_score": True, "metric": "mean absolute error", "model": "DecisionTreeRegressor()", "model_score": 0.0, "ppscore": 1.0, "x": "foo", "y": "bar", }, "success": True, } unittest.assertEqual( {k: v for k, v in response_data.items() if k != "code"}, expected, "should return timeseries correlation", ) unittest.assertEqual(response_data["code"], CORRELATIONS_TS_CODE) params["cols"] = json.dumps(["foo", "baz_baz"]) params["dummyCols"] = json.dumps(["baz"]) response = c.get("/dtale/correlations-ts/{}".format(c.port), query_string=params) response_data = json.loads(response.data) assert response_data["success"] params["cols"] = json.dumps(["foo", "bar"]) del params["dummyCols"] params["rolling"] = False params["rollingWindow"] = 4 params["minPeriods"] = 4 response = c.get("/dtale/correlations-ts/{}".format(c.port), query_string=params) response_data = json.loads(response.data) unittest.assertEqual(response_data["data"]["all"]["x"], ["2000-01-04", "2000-01-05"]) df, _ = views.format_data(rolling_data) with app.test_client() as c: build_data_inst({c.port: df}) build_dtypes({c.port: views.build_dtypes_state(df)}) params = dict( dateCol="date", cols=json.dumps(["0", "1"]), rolling=True, rollingWindow="4", ) response = c.get("/dtale/correlations-ts/{}".format(c.port), query_string=params) response_data = json.loads(response.data) unittest.assertEqual(response_data["success"], True, "should return rolling correlation") with app.test_client() as c: build_data_inst({c.port: test_data}) settings = {c.port: {"query": "missing_col == 'blah'"}} build_settings(settings) response = c.get("/dtale/correlations-ts/{}".format(c.port)) response_data = json.loads(response.data) unittest.assertEqual( response_data["error"], "name 'missing_col' is not defined", "should handle correlations exception", )
def test_merge(unittest): from dtale.views import build_dtypes_state import dtale.global_state as global_state global_state.clear_store() left = pd.DataFrame( { "key1": ["K0", "K0", "K1", "K2"], "key2": ["K0", "K1", "K0", "K1"], "A": ["A0", "A1", "A2", "A3"], "B": ["B0", "B1", "B2", "B3"], } ) right = pd.DataFrame( { "key1": ["K0", "K1", "K1", "K2"], "key2": ["K0", "K0", "K0", "K0"], "C": ["C0", "C1", "C2", "C3"], "D": ["D0", "D1", "D2", "D3"], } ) right2 = pd.DataFrame( { "key1": ["K0", "K1"], "key2": ["K0", "K0"], "E": ["E0", "E1"], "F": ["F0", "F1"], } ) with app.test_client() as c: data = {"1": left, "2": right, "3": right2} dtypes = {k: build_dtypes_state(v) for k, v in data.items()} settings = {k: {} for k in data.keys()} build_data_inst(data) build_dtypes(dtypes) build_settings(settings) datasets = [ dict(dataId="1", columns=[], index=["key1", "key2"], suffix=""), dict(dataId="2", columns=[], index=["key1", "key2"], suffix=""), ] config = dict(how="inner", sort=False, indicator=False) resp = c.post( "/dtale/merge", data=json.dumps( dict( action="merge", config=json.dumps(config), datasets=json.dumps(datasets), ) ), content_type="application/json", ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) unittest.assertEqual( list(final_df.columns), ["key1", "key2", "A", "B", "C", "D"] ) assert len(final_df) == 3 datasets[0]["columns"] = ["A"] datasets[1]["columns"] = ["C"] config["how"] = "left" config["indicator"] = True resp = c.post( "/dtale/merge", data=json.dumps( dict( action="merge", config=json.dumps(config), datasets=json.dumps(datasets), ) ), content_type="application/json", ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) unittest.assertEqual( list(final_df.columns), ["key1", "key2", "A", "C", "merge_1"] ) unittest.assertEqual( list(final_df["merge_1"].values), ["both", "left_only", "both", "both", "left_only"], ) datasets.append(dict(dataId="3", index=["key1", "key2"], suffix="3")) resp = c.post( "/dtale/merge", data=json.dumps( dict( action="merge", config=json.dumps(config), datasets=json.dumps(datasets), ) ), content_type="application/json", ) assert resp.status_code == 200 final_df = global_state.get_data(resp.json["data_id"]) unittest.assertEqual( list(final_df.columns), ["key1", "key2", "A", "C", "merge_1", "E", "F", "merge_2"], ) unittest.assertEqual( list(final_df["merge_2"].values), ["both", "left_only", "both", "both", "left_only"], )