Exemple #1
0
def test_transpose(custom_data, unittest):
    from dtale.views import build_dtypes_state

    global_state.clear_store()
    with app.test_client() as c:
        data = {c.port: custom_data}
        dtypes = {c.port: build_dtypes_state(custom_data)}
        settings = {c.port: {}}

        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        reshape_cfg = dict(index=["security_id"], columns=["Col0"])
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="transpose",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        new_key = int(c.port) + 1
        assert "error" in response_data

        min_date = custom_data["date"].min().strftime("%Y-%m-%d")
        global_state.set_settings(c.port,
                                  dict(query="date == '{}'".format(min_date)))
        reshape_cfg = dict(index=["date", "security_id"], columns=["Col0"])
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="transpose",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            [
                "index",
                "{} 00:00:00 100000".format(min_date),
                "{} 00:00:00 100001".format(min_date),
            ],
        )
        assert len(global_state.get_data(new_key)) == 1
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))

        reshape_cfg = dict(index=["date", "security_id"])
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="override",
                              type="transpose",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == c.port
Exemple #2
0
def test_stack(unittest):
    from dtale.views import build_dtypes_state
    import dtale.global_state as global_state

    global_state.clear_store()
    df1 = pd.DataFrame({
        "A": ["A0", "A1"],
        "B": ["B0", "B1"],
        "C": ["C0", "C1"],
        "D": ["D0", "D1"],
    })
    df2 = pd.DataFrame({
        "A": ["A2", "A3"],
        "B": ["B3", "B3"],
        "C": ["C3", "C3"],
        "D": ["D3", "D3"],
    })

    with app.test_client() as c:
        data = {"1": df1, "2": df2}
        dtypes = {k: build_dtypes_state(v) for k, v in data.items()}
        settings = {k: {} for k in data.keys()}
        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        datasets = [dict(dataId="1", columns=[]), dict(dataId="2", columns=[])]
        config = dict(ignore_index=False)
        resp = c.post(
            "/dtale/merge",
            data=dict(
                action="stack",
                config=json.dumps(config),
                datasets=json.dumps(datasets),
            ),
        )
        assert resp.status_code == 200
        final_df = global_state.get_data(resp.json["data_id"])
        unittest.assertEqual(list(final_df["A"].values),
                             ["A0", "A1", "A2", "A3"])
        unittest.assertEqual(list(final_df["index"].values), [0, 1, 0, 1])

        config["ignoreIndex"] = True
        resp = c.post(
            "/dtale/merge",
            data=dict(
                action="stack",
                config=json.dumps(config),
                datasets=json.dumps(datasets),
            ),
        )
        assert resp.status_code == 200
        final_df = global_state.get_data(resp.json["data_id"])
        assert "index" not in final_df.columns
        unittest.assertEqual(list(final_df["A"].values),
                             ["A0", "A1", "A2", "A3"])
def test_get_column_analysis_geolocation(unittest):
    df = pd.DataFrame(dict(a=[1, 2, 3], b=[3, 4, 5]))
    with app.test_client() as c:
        build_data_inst({c.port: df})
        settings = {c.port: {}}
        build_settings(settings)
        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(col="a", type="geolocation", latCol="a", lonCol="b"),
        )
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["lat"], [1, 2, 3])
        unittest.assertEqual(response_data["lon"], [3, 4, 5])
def test_get_column_analysis_word_value_count(unittest):
    df = pd.DataFrame(dict(a=["a b c", "d e f", "g h i"], b=[3, 4, 5]))
    with app.test_client() as c:
        build_data_inst({c.port: df})
        settings = {c.port: {}}
        build_settings(settings)
        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(col="a", type="word_value_counts"),
        )
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["labels"], ["a", "b", "c", "d", "e", "f", "g", "h", "i"]
        )
        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(
                col="a",
                type="word_value_counts",
                ordinalCol="b",
                ordinalAgg="mean",
                cleaner="underscore_to_space",
            ),
        )
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["ordinal"], [3, 3, 3, 4, 4, 4, 5, 5, 5])

        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(
                col="a",
                type="word_value_counts",
                ordinalCol="b",
                ordinalAgg="pctsum",
            ),
        )
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["ordinal"],
            [
                0.083333,
                0.083333,
                0.083333,
                0.111111,
                0.111111,
                0.111111,
                0.138889,
                0.138889,
                0.138889,
            ],
        )
def test_get_column_analysis_qq():
    import dtale.views as views

    df = pd.DataFrame(dict(a=np.random.normal(loc=20, scale=5, size=100)))
    with app.test_client() as c:
        build_data_inst({c.port: df})
        build_dtypes({c.port: views.build_dtypes_state(df)})
        settings = {c.port: {}}
        build_settings(settings)
        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(col="a", type="qq"),
        )
        response_data = json.loads(response.data)
        assert len(response_data["data"]) == 100
def test_get_column_analysis_kde():
    import dtale.views as views

    df = pd.DataFrame(dict(a=np.random.randn(100)))
    with app.test_client() as c:
        build_data_inst({c.port: df})
        build_dtypes({c.port: views.build_dtypes_state(df)})
        settings = {c.port: {}}
        build_settings(settings)
        response = c.get(
            "/dtale/column-analysis/{}".format(c.port),
            query_string=dict(col="a", type="histogram", bins=50),
        )
        response_data = json.loads(response.data)
        assert len(response_data["kde"]) == 51
Exemple #7
0
def test_probability_histogram(unittest, test_data):
    import dtale.views as views

    with app.test_client() as c:
        with ExitStack():
            build_data_inst({c.port: test_data})
            build_dtypes({c.port: views.build_dtypes_state(test_data)})
            build_settings({c.port: {}})
            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(col="foo", density="true"),
            )
            response_data = json.loads(response.data)
            assert response.status_code == 200
            assert "np.histogram(s['foo'], density=True)" in response_data[
                "code"]
Exemple #8
0
def test_edit_float():
    from dtale.views import build_dtypes_state, format_data

    df = edit_data()
    df, _ = format_data(df)
    with app.test_client() as c:
        data = {c.port: df}
        build_data_inst(data)
        settings = {c.port: {"locked": ["a"]}}
        build_settings(settings)
        dtypes = {c.port: build_dtypes_state(df)}
        build_dtypes(dtypes)
        resp = c.get(
            "/dtale/edit-cell/{}/b".format(c.port),
            query_string=dict(rowIndex=0, updated=2.5),
        )
        assert "error" not in resp.json
        assert data[c.port]["b"].values[0] == 2.5
Exemple #9
0
def test_resample(unittest):
    from dtale.views import build_dtypes_state, format_data

    start, end = "2000-10-01 23:30:00", "2000-10-03 00:30:00"
    rng = pd.date_range(start, end, freq="7min")
    ts = pd.Series(np.arange(len(rng)) * 3, index=rng)
    ts2 = pd.Series(np.arange(len(rng)) * 0.32, index=rng)
    df = pd.DataFrame(data={"col1": ts, "col2": ts2})
    df, _ = format_data(df)

    global_state.clear_store()
    with app.test_client() as c:
        data = {c.port: df}
        dtypes = {c.port: build_dtypes_state(df)}
        settings = {c.port: {}}

        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        reshape_cfg = dict(index="index",
                           columns=["col1"],
                           freq="17min",
                           agg="mean")
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="resample",
                              cfg=json.dumps(reshape_cfg)),
        )

        response_data = json.loads(resp.data)
        new_key = int(c.port) + 1
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            ["index_17min", "col1"],
        )
        assert len(global_state.get_data(new_key)) == 90
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
Exemple #10
0
def test_edit_timedelta():
    from dtale.views import build_dtypes_state, format_data

    df = edit_data()
    df, _ = format_data(df)
    with app.test_client() as c:
        data = {c.port: df}
        build_data_inst(data)
        settings = {c.port: {"locked": ["a"]}}
        build_settings(settings)
        dtypes = {c.port: build_dtypes_state(df)}
        build_dtypes(dtypes)
        resp = c.get(
            "/dtale/edit-cell/{}".format(c.port),
            query_string=dict(col="g", rowIndex=0, updated="0 days 00:09:20"),
        )
        assert "error" not in resp.json
        assert pd.Timedelta(data[c.port]["g"].values[0]) == pd.Timedelta(
            "0 days 00:09:20"
        )
Exemple #11
0
def test_edit_timestamp():
    from dtale.views import build_dtypes_state, format_data

    df = edit_data()
    df, _ = format_data(df)
    with app.test_client() as c:
        data = {c.port: df}
        build_data_inst(data)
        settings = {c.port: {"locked": ["a"]}}
        build_settings(settings)
        dtypes = {c.port: build_dtypes_state(df)}
        build_dtypes(dtypes)
        resp = c.get(
            "/dtale/edit-cell/{}/e".format(c.port),
            query_string=dict(rowIndex=0, updated="20000101 11:58:59.999999999"),
        )
        assert "error" not in resp.json
        assert pd.Timestamp(data[c.port]["e"].values[0]) == pd.Timestamp(
            "2000-01-01 11:58:59.999999999"
        )
Exemple #12
0
def test_edit_to_nan():
    from dtale.views import build_dtypes_state, format_data

    df = edit_data()
    df, _ = format_data(df)
    with app.test_client() as c:
        data = {c.port: df}
        build_data_inst(data)
        settings = {c.port: {"locked": ["a"]}}
        build_settings(settings)
        dtypes = {c.port: build_dtypes_state(df)}
        build_dtypes(dtypes)
        c.get(
            "/dtale/edit-cell/{}/a".format(c.port),
            query_string=dict(rowIndex=0, updated="nan"),
        )
        assert pd.isnull(data[c.port].a.values[0])
        c.get(
            "/dtale/edit-cell/{}/b".format(c.port),
            query_string=dict(rowIndex=0, updated="inf"),
        )
        assert np.isinf(data[c.port].b.values[0])
def test_get_column_analysis(unittest, test_data):
    import dtale.views as views

    with app.test_client() as c:
        with ExitStack() as stack:
            build_data_inst({c.port: test_data})
            build_dtypes({c.port: views.build_dtypes_state(test_data)})
            build_settings({c.port: {}})
            response = c.get(
                "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo")
            )
            response_data = json.loads(response.data)
            expected = dict(
                labels=[
                    "0.6",
                    "0.6",
                    "0.7",
                    "0.7",
                    "0.8",
                    "0.8",
                    "0.9",
                    "0.9",
                    "0.9",
                    "1.0",
                    "1.1",
                    "1.1",
                    "1.1",
                    "1.2",
                    "1.2",
                    "1.3",
                    "1.4",
                    "1.4",
                    "1.5",
                    "1.5",
                ],
                data=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                desc={
                    "count": "50",
                    "std": "0",
                    "min": "1",
                    "max": "1",
                    "50%": "1",
                    "25%": "1",
                    "75%": "1",
                    "mean": "1",
                    "missing_ct": "0",
                    "missing_pct": 0.0,
                    "total_count": "50",
                    "kurt": 0.0,
                    "skew": 0.0,
                },
                chart_type="histogram",
                dtype="int64",
                query="",
            )
            unittest.assertEqual(
                {k: v for k, v in response_data.items() if k not in ["code", "cols"]},
                expected,
                "should return 20-bin histogram for foo",
            )
            unittest.assertEqual(response_data["code"], HISTOGRAM_CODE)

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(col="foo", bins=5),
            )
            response_data = json.loads(response.data)
            expected = dict(
                labels=["0.7", "0.9", "1.1", "1.3", "1.5"],
                data=[0, 0, 50, 0, 0],
                desc={
                    "count": "50",
                    "std": "0",
                    "min": "1",
                    "max": "1",
                    "50%": "1",
                    "25%": "1",
                    "75%": "1",
                    "mean": "1",
                    "missing_ct": "0",
                    "missing_pct": 0.0,
                    "total_count": "50",
                    "kurt": 0.0,
                    "skew": 0.0,
                },
                chart_type="histogram",
                dtype="int64",
                query="",
            )
            unittest.assertEqual(
                {k: v for k, v in response_data.items() if k not in ["code", "cols"]},
                expected,
                "should return 5-bin histogram for foo",
            )
            global_state.set_settings(c.port, dict(query="security_id > 10"))
            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(col="foo", bins=5),
            )
            response_data = json.loads(response.data)
            expected = dict(
                labels=["0.7", "0.9", "1.1", "1.3", "1.5"],
                data=[0, 0, 39, 0, 0],
                desc={
                    "count": "39",
                    "std": "0",
                    "min": "1",
                    "max": "1",
                    "50%": "1",
                    "25%": "1",
                    "75%": "1",
                    "mean": "1",
                    "missing_ct": "0",
                    "missing_pct": 0.0,
                    "total_count": "39",
                    "kurt": 0.0,
                    "skew": 0.0,
                },
                chart_type="histogram",
                dtype="int64",
                query="security_id > 10",
            )
            unittest.assertEqual(
                {k: v for k, v in response_data.items() if k not in ["code", "cols"]},
                expected,
                "should return a filtered 5-bin histogram for foo",
            )
            global_state.set_settings(c.port, {})
            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(col="foo", type="value_counts", top=2),
            )
            response_data = json.loads(response.data)
            assert response_data["chart_type"] == "value_counts"

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(
                    col="foo", type="value_counts", ordinalCol="bar", ordinalAgg="mean"
                ),
            )
            response_data = json.loads(response.data)
            assert "ordinal" in response_data

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(
                    col="foo",
                    type="value_counts",
                    ordinalCol="bar",
                    ordinalAgg="pctsum",
                ),
            )
            response_data = json.loads(response.data)
            assert "ordinal" in response_data

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(
                    col="bar", type="categories", categoryCol="foo", categoryAgg="mean"
                ),
            )
            response_data = json.loads(response.data)
            assert "count" in response_data

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port),
                query_string=dict(
                    col="bar",
                    type="categories",
                    categoryCol="foo",
                    categoryAgg="pctsum",
                ),
            )
            response_data = json.loads(response.data)
            assert "count" in response_data

    with app.test_client() as c:
        with ExitStack() as stack:
            build_data_inst({c.port: test_data})
            stack.enter_context(
                mock.patch(
                    "numpy.histogram",
                    mock.Mock(side_effect=Exception("histogram failure")),
                )
            )

            response = c.get(
                "/dtale/column-analysis/{}".format(c.port), query_string=dict(col="foo")
            )
            response_data = json.loads(response.data)
            unittest.assertEqual(
                response_data["error"],
                "histogram failure",
                "should handle histogram exception",
            )
Exemple #14
0
def test_pivot(custom_data, unittest):
    from dtale.views import build_dtypes_state

    global_state.clear_store()
    with app.test_client() as c:
        data = {c.port: custom_data}
        dtypes = {c.port: build_dtypes_state(custom_data)}
        settings = {c.port: {}}

        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        reshape_cfg = dict(index=["date"],
                           columns=["security_id"],
                           values=["Col0"],
                           aggfunc="mean")
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="pivot",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        new_key = int(c.port) + 1
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            ["date", "100000", "100001"],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None

        resp = c.get("/dtale/cleanup-datasets",
                     query_string=dict(dataIds=new_key))
        assert json.loads(resp.data)["success"]
        assert len(global_state.keys()) == 1

        reshape_cfg["columnNameHeaders"] = True
        reshape_cfg["aggfunc"] = "sum"
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="pivot",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            ["date", "security_id-100000", "security_id-100001"],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))

        reshape_cfg["columnNameHeaders"] = False
        reshape_cfg["values"] = ["Col0", "Col1"]
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="pivot",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            [
                "date", "Col0 100000", "Col0 100001", "Col1 100000",
                "Col1 100001"
            ],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
Exemple #15
0
def test_aggregate(custom_data, unittest):
    from dtale.views import build_dtypes_state

    global_state.clear_store()
    with app.test_client() as c:
        data = {c.port: custom_data}
        dtypes = {c.port: build_dtypes_state(custom_data)}
        settings = {c.port: {}}

        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        reshape_cfg = dict(
            index="date",
            agg=dict(type="col",
                     cols={
                         "Col0": ["sum", "mean"],
                         "Col1": ["count"]
                     }),
        )
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="aggregate",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        new_key = int(c.port) + 1
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            ["date", "Col0 sum", "Col0 mean", "Col1 count"],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))

        reshape_cfg = dict(index="date",
                           agg=dict(type="func",
                                    func="mean",
                                    cols=["Col0", "Col1"]))
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="aggregate",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            ["date", "Col0", "Col1"],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))

        reshape_cfg = dict(index="date", agg=dict(type="func", func="mean"))
        resp = c.get(
            "/dtale/reshape/{}".format(c.port),
            query_string=dict(output="new",
                              type="aggregate",
                              cfg=json.dumps(reshape_cfg)),
        )
        response_data = json.loads(resp.data)
        assert response_data["data_id"] == new_key
        assert len(global_state.keys()) == 2
        unittest.assertEqual(
            [d["name"] for d in global_state.get_dtypes(new_key)],
            [
                "date", "security_id", "int_val", "Col0", "Col1", "Col2",
                "bool_val"
            ],
        )
        assert len(global_state.get_data(new_key)) == 365
        assert global_state.get_settings(new_key).get(
            "startup_code") is not None
        c.get("/dtale/cleanup-datasets", query_string=dict(dataIds=new_key))
Exemple #16
0
def test_get_correlations(unittest, test_data, rolling_data):
    import dtale.views as views

    with app.test_client() as c:
        test_data, _ = views.format_data(test_data)
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        response = c.get("/dtale/correlations/{}".format(c.port))
        response_data = json.loads(response.data)
        expected = dict(
            data=[
                dict(column="security_id", security_id=1.0, foo=None,
                     bar=None),
                dict(column="foo", security_id=None, foo=None, bar=None),
                dict(column="bar", security_id=None, foo=None, bar=None),
            ],
            dates=[],
            pps=None,
            dummyColMappings={},
            strings=["baz"],
        )
        unittest.assertEqual(
            {k: v
             for k, v in response_data.items() if k != "code"},
            expected,
            "should return correlations",
        )
        unittest.assertEqual(response_data["code"], CORRELATIONS_CODE)

        response = c.get(
            "/dtale/correlations/{}".format(c.port),
            query_string={"encodeStrings": True},
        )
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["dummyColMappings"],
                             {"baz": ["baz_baz"]})

    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        settings = {c.port: {"query": "missing_col == 'blah'"}}
        build_settings(settings)
        response = c.get("/dtale/correlations/{}".format(c.port))
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["error"],
            "name 'missing_col' is not defined",
            "should handle correlations exception",
        )

    with app.test_client() as c:
        test_data.loc[test_data.security_id == 1, "bar"] = np.nan
        test_data2 = test_data.copy()
        test_data2.loc[:, "date"] = pd.Timestamp("20000102")
        test_data = pd.concat([test_data, test_data2], ignore_index=True)
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        response = c.get("/dtale/correlations/{}".format(c.port))
        response_data = json.loads(response.data)
        expected = dict(
            data=[
                dict(column="security_id", security_id=1.0, foo=None,
                     bar=None),
                dict(column="foo", security_id=None, foo=None, bar=None),
                dict(column="bar", security_id=None, foo=None, bar=None),
            ],
            dates=[dict(name="date", rolling=False)],
            pps=None,
            dummyColMappings={},
            strings=["baz"],
        )
        unittest.assertEqual(
            {k: v
             for k, v in response_data.items() if k != "code"},
            expected,
            "should return correlations",
        )

    df, _ = views.format_data(rolling_data)
    with app.test_client() as c:
        build_data_inst({c.port: df})
        build_dtypes({c.port: views.build_dtypes_state(df)})
        response = c.get("/dtale/correlations/{}".format(c.port))
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["dates"],
            [dict(name="date", rolling=True)],
            "should return correlation date columns",
        )
Exemple #17
0
def test_get_scatter(unittest, rolling_data):
    import dtale.views as views

    no_pps = parse_version(platform.python_version()) < parse_version("3.6.0")
    test_data = pd.DataFrame(build_ts_data(),
                             columns=["date", "security_id", "foo", "bar"])
    test_data.loc[:, "baz"] = "baz"
    test_data, _ = views.format_data(test_data)
    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        params = dict(dateCol="date", cols=json.dumps(["foo", "bar"]), index=0)
        response = c.get("/dtale/scatter/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        expected = dict(
            y="bar",
            stats={
                "pearson": 0.9999999999999999,
                "correlated": 5,
                "only_in_s0": 0,
                "only_in_s1": 0,
                "spearman": 0.9999999999999999,
                "pps": None if no_pps else {
                    "baseline_score": 1.2,
                    "case": "regression",
                    "is_valid_score": True,
                    "metric": "mean absolute error",
                    "model": "DecisionTreeRegressor()",
                    "model_score": 1.0,
                    "ppscore": 0.16666666666666663,
                    "x": "foo",
                    "y": "bar",
                },
            },
            data={
                "all": {
                    "bar": [0, 1, 2, 3, 4],
                    "_corr_index": [0, 1, 2, 3, 4],
                    "x": [0, 1, 2, 3, 4],
                }
            },
            max={
                "bar": 4,
                "_corr_index": 4,
                "x": 4
            },
            min={
                "bar": 0,
                "_corr_index": 0,
                "x": 0
            },
            x="foo",
            date=" for 2000-01-01",
        )
        unittest.assertEqual(
            {k: v
             for k, v in response_data.items() if k != "code"},
            expected,
            "should return scatter",
        )
        unittest.assertEqual(response_data["code"], SCATTER_CODE)

        params["cols"] = json.dumps(["foo", "baz_baz"])
        params["dummyCols"] = json.dumps(["baz"])
        response = c.get("/dtale/scatter/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["data"]["all"]["baz_baz"],
                             ["1", "1", "1", "1", "1"])

    df, _ = views.format_data(rolling_data)
    with app.test_client() as c:
        build_data_inst({c.port: df})
        build_dtypes({c.port: views.build_dtypes_state(df)})
        params = dict(
            dateCol="date",
            cols=json.dumps(["0", "1"]),
            index=699,
            rolling=True,
            window="4",
        )
        response = c.get("/dtale/scatter/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        assert len(response_data["data"]["all"]["1"]) == 4
        assert sorted(
            response_data["data"]["all"]) == ["1", "_corr_index", "date", "x"]
        unittest.assertEqual(
            sorted(response_data["data"]["all"]["date"]),
            ["2019-11-28", "2019-11-29", "2019-11-30", "2019-12-01"],
            "should return scatter",
        )

    test_data = pd.DataFrame(build_ts_data(size=15001, days=1),
                             columns=["date", "security_id", "foo", "bar"])

    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        params = dict(dateCol="date",
                      cols=json.dumps(["foo", "bar"]),
                      date="20000101")
        response = c.get("/dtale/scatter/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        expected = dict(
            stats={
                "correlated": 15001,
                "only_in_s0": 0,
                "only_in_s1": 0,
                "pearson": 1.0,
                "pps": None if no_pps else {
                    "baseline_score": 3736.0678,
                    "case": "regression",
                    "is_valid_score": True,
                    "metric": "mean absolute error",
                    "model": "DecisionTreeRegressor()",
                    "model_score": 2.2682,
                    "ppscore": 0.9993928911033145,
                    "x": "foo",
                    "y": "bar",
                },
                "spearman": 1.0,
            },
            error=
            "Dataset exceeds 15,000 records, cannot render scatter. Please apply filter...",
            traceback=CHART_POINTS_LIMIT,
        )
        unittest.assertEqual(
            {k: v
             for k, v in response_data.items() if k != "code"},
            expected,
            "should return scatter",
        )

    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        build_dtypes({c.port: views.build_dtypes_state(test_data)})
        settings = {c.port: {"query": "missing_col == 'blah'"}}
        build_settings(settings)
        params = dict(dateCol="date",
                      cols=json.dumps(["foo", "bar"]),
                      date="20000101")
        response = c.get("/dtale/scatter/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["error"],
            "name 'missing_col' is not defined",
            "should handle correlations exception",
        )
Exemple #18
0
def test_get_correlations_ts(unittest, rolling_data):
    import dtale.views as views

    test_data = pd.DataFrame(build_ts_data(size=50),
                             columns=["date", "security_id", "foo", "bar"])
    test_data.loc[:, "baz"] = "baz"

    no_pps = parse_version(platform.python_version()) < parse_version("3.6.0")

    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        params = dict(dateCol="date", cols=json.dumps(["foo", "bar"]))
        response = c.get("/dtale/correlations-ts/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        expected = {
            "data": {
                "all": {
                    "x": [
                        "2000-01-01",
                        "2000-01-02",
                        "2000-01-03",
                        "2000-01-04",
                        "2000-01-05",
                    ],
                    "corr": [1.0, 1.0, 1.0, 1.0, 1.0],
                }
            },
            "max": {
                "corr": 1.0,
                "x": "2000-01-05"
            },
            "min": {
                "corr": 1.0,
                "x": "2000-01-01"
            },
            "pps": None if no_pps else {
                "baseline_score": 12.5,
                "case": "regression",
                "is_valid_score": True,
                "metric": "mean absolute error",
                "model": "DecisionTreeRegressor()",
                "model_score": 0.0,
                "ppscore": 1.0,
                "x": "foo",
                "y": "bar",
            },
            "success": True,
        }
        unittest.assertEqual(
            {k: v
             for k, v in response_data.items() if k != "code"},
            expected,
            "should return timeseries correlation",
        )
        unittest.assertEqual(response_data["code"], CORRELATIONS_TS_CODE)

        params["cols"] = json.dumps(["foo", "baz_baz"])
        params["dummyCols"] = json.dumps(["baz"])
        response = c.get("/dtale/correlations-ts/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        assert response_data["success"]

        params["cols"] = json.dumps(["foo", "bar"])
        del params["dummyCols"]
        params["rolling"] = False
        params["rollingWindow"] = 4
        params["minPeriods"] = 4
        response = c.get("/dtale/correlations-ts/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["data"]["all"]["x"],
                             ["2000-01-04", "2000-01-05"])

    df, _ = views.format_data(rolling_data)
    with app.test_client() as c:
        build_data_inst({c.port: df})
        build_dtypes({c.port: views.build_dtypes_state(df)})
        params = dict(
            dateCol="date",
            cols=json.dumps(["0", "1"]),
            rolling=True,
            rollingWindow="4",
        )
        response = c.get("/dtale/correlations-ts/{}".format(c.port),
                         query_string=params)
        response_data = json.loads(response.data)
        unittest.assertEqual(response_data["success"], True,
                             "should return rolling correlation")

    with app.test_client() as c:
        build_data_inst({c.port: test_data})
        settings = {c.port: {"query": "missing_col == 'blah'"}}
        build_settings(settings)
        response = c.get("/dtale/correlations-ts/{}".format(c.port))
        response_data = json.loads(response.data)
        unittest.assertEqual(
            response_data["error"],
            "name 'missing_col' is not defined",
            "should handle correlations exception",
        )
def test_merge(unittest):
    from dtale.views import build_dtypes_state
    import dtale.global_state as global_state

    global_state.clear_store()

    left = pd.DataFrame(
        {
            "key1": ["K0", "K0", "K1", "K2"],
            "key2": ["K0", "K1", "K0", "K1"],
            "A": ["A0", "A1", "A2", "A3"],
            "B": ["B0", "B1", "B2", "B3"],
        }
    )
    right = pd.DataFrame(
        {
            "key1": ["K0", "K1", "K1", "K2"],
            "key2": ["K0", "K0", "K0", "K0"],
            "C": ["C0", "C1", "C2", "C3"],
            "D": ["D0", "D1", "D2", "D3"],
        }
    )
    right2 = pd.DataFrame(
        {
            "key1": ["K0", "K1"],
            "key2": ["K0", "K0"],
            "E": ["E0", "E1"],
            "F": ["F0", "F1"],
        }
    )
    with app.test_client() as c:
        data = {"1": left, "2": right, "3": right2}
        dtypes = {k: build_dtypes_state(v) for k, v in data.items()}
        settings = {k: {} for k in data.keys()}
        build_data_inst(data)
        build_dtypes(dtypes)
        build_settings(settings)
        datasets = [
            dict(dataId="1", columns=[], index=["key1", "key2"], suffix=""),
            dict(dataId="2", columns=[], index=["key1", "key2"], suffix=""),
        ]
        config = dict(how="inner", sort=False, indicator=False)
        resp = c.post(
            "/dtale/merge",
            data=json.dumps(
                dict(
                    action="merge",
                    config=json.dumps(config),
                    datasets=json.dumps(datasets),
                )
            ),
            content_type="application/json",
        )
        assert resp.status_code == 200
        final_df = global_state.get_data(resp.json["data_id"])
        unittest.assertEqual(
            list(final_df.columns), ["key1", "key2", "A", "B", "C", "D"]
        )
        assert len(final_df) == 3

        datasets[0]["columns"] = ["A"]
        datasets[1]["columns"] = ["C"]
        config["how"] = "left"
        config["indicator"] = True
        resp = c.post(
            "/dtale/merge",
            data=json.dumps(
                dict(
                    action="merge",
                    config=json.dumps(config),
                    datasets=json.dumps(datasets),
                )
            ),
            content_type="application/json",
        )
        assert resp.status_code == 200
        final_df = global_state.get_data(resp.json["data_id"])
        unittest.assertEqual(
            list(final_df.columns), ["key1", "key2", "A", "C", "merge_1"]
        )
        unittest.assertEqual(
            list(final_df["merge_1"].values),
            ["both", "left_only", "both", "both", "left_only"],
        )

        datasets.append(dict(dataId="3", index=["key1", "key2"], suffix="3"))
        resp = c.post(
            "/dtale/merge",
            data=json.dumps(
                dict(
                    action="merge",
                    config=json.dumps(config),
                    datasets=json.dumps(datasets),
                )
            ),
            content_type="application/json",
        )
        assert resp.status_code == 200
        final_df = global_state.get_data(resp.json["data_id"])
        unittest.assertEqual(
            list(final_df.columns),
            ["key1", "key2", "A", "C", "merge_1", "E", "F", "merge_2"],
        )
        unittest.assertEqual(
            list(final_df["merge_2"].values),
            ["both", "left_only", "both", "both", "left_only"],
        )