Beispiel #1
0
    def test_index_groupby(self, simple_index):
        idx = simple_index[:5]
        to_groupby = np.array([1, 2, np.nan, 2, 1])
        tm.assert_dict_equal(idx.groupby(to_groupby), {
            1.0: idx[[0, 4]],
            2.0: idx[[1, 3]]
        })

        to_groupby = DatetimeIndex(
            [
                datetime(2011, 11, 1),
                datetime(2011, 12, 1),
                pd.NaT,
                datetime(2011, 12, 1),
                datetime(2011, 11, 1),
            ],
            tz="UTC",
        ).values

        ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")]
        expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]}
        tm.assert_dict_equal(idx.groupby(to_groupby), expected)
Beispiel #2
0
def test_observed_groups(observed):
    # gh-20583
    # test that we have the appropriate groups

    cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"])
    df = DataFrame({"cat": cat, "vals": [1, 2, 3]})
    g = df.groupby("cat", observed=observed)

    result = g.groups
    if observed:
        expected = {
            "a": Index([0, 2], dtype="int64"),
            "c": Index([1], dtype="int64")
        }
    else:
        expected = {
            "a": Index([0, 2], dtype="int64"),
            "b": Index([], dtype="int64"),
            "c": Index([1], dtype="int64"),
        }

    tm.assert_dict_equal(result, expected)
Beispiel #3
0
    def test_multiindex_columns_empty_level(self):
        lst = [["count", "values"], ["to filter", ""]]
        midx = MultiIndex.from_tuples(lst)

        df = DataFrame([[1, "A"]], columns=midx)

        grouped = df.groupby("to filter").groups
        assert grouped["A"] == [0]

        grouped = df.groupby([("to filter", "")]).groups
        assert grouped["A"] == [0]

        df = DataFrame([[1, "A"], [2, "B"]], columns=midx)

        expected = df.groupby("to filter").groups
        result = df.groupby([("to filter", "")]).groups
        assert result == expected

        df = DataFrame([[1, "A"], [2, "A"]], columns=midx)

        expected = df.groupby("to filter").groups
        result = df.groupby([("to filter", "")]).groups
        tm.assert_dict_equal(result, expected)
Beispiel #4
0
    def test_concatenating_accepts_pandas_dataframes(self, _tfs_file_x_pathlib,
                                                     _tfs_file_y_pathlib,
                                                     how_headers, axis, join):
        dframe_x = tfs.read(_tfs_file_x_pathlib)
        dframe_y = pd.DataFrame(
            tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
        objs = [dframe_x] * 4 + [
            dframe_y
        ] * 4  # now has a mix of TfsDataFrames and pandas.DataFrames
        result = concat(objs, how_headers=how_headers, axis=axis, join=join)

        merger = partial(merge_headers, how=how_headers)
        # all_headers = (tfsdframe.headers for tfsdframe in objs)
        assert isinstance(result, TfsDataFrame)
        assert isinstance(result.headers, OrderedDict)

        all_headers = [  # empty OrderedDicts here as it's what objects are getting when converted in the call
            dframe.headers
            if isinstance(dframe, TfsDataFrame) else OrderedDict()
            for dframe in objs
        ]
        assert_dict_equal(result.headers, reduce(merger, all_headers))
        assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
Beispiel #5
0
    def test_merging_accepts_pandas_dataframe(self, _tfs_file_x_pathlib,
                                              _tfs_file_y_pathlib, how_headers,
                                              how, on):
        dframe_x = tfs.read(_tfs_file_x_pathlib)
        dframe_y = pd.DataFrame(
            tfs.read(_tfs_file_y_pathlib))  # for test, loses headers here
        result = dframe_x.merge(dframe_y,
                                how_headers=how_headers,
                                how=how,
                                on=on)

        assert isinstance(result, TfsDataFrame)
        assert isinstance(result.headers, OrderedDict)

        # using empty OrderedDict here as it's what dframe_y is getting when converted in the call
        assert_dict_equal(
            result.headers,
            merge_headers(dframe_x.headers, OrderedDict(), how=how_headers))
        assert_frame_equal(
            result,
            pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y),
                                         how=how,
                                         on=on))
Beispiel #6
0
    def test_to_dict_timestamp(self):

        # GH#11247
        # split/records producing np.datetime64 rather than Timestamps
        # on datetime64[ns] dtypes only

        tsmp = Timestamp("20130101")
        test_data = DataFrame({"A": [tsmp, tsmp], "B": [tsmp, tsmp]})
        test_data_mixed = DataFrame({"A": [tsmp, tsmp], "B": [1, 2]})

        expected_records = [{"A": tsmp, "B": tsmp}, {"A": tsmp, "B": tsmp}]
        expected_records_mixed = [{"A": tsmp, "B": 1}, {"A": tsmp, "B": 2}]

        assert test_data.to_dict(orient="records") == expected_records
        assert test_data_mixed.to_dict(
            orient="records") == expected_records_mixed

        expected_series = {
            "A": Series([tsmp, tsmp], name="A"),
            "B": Series([tsmp, tsmp], name="B"),
        }
        expected_series_mixed = {
            "A": Series([tsmp, tsmp], name="A"),
            "B": Series([1, 2], name="B"),
        }

        tm.assert_dict_equal(test_data.to_dict(orient="series"),
                             expected_series)
        tm.assert_dict_equal(test_data_mixed.to_dict(orient="series"),
                             expected_series_mixed)

        expected_split = {
            "index": [0, 1],
            "data": [[tsmp, tsmp], [tsmp, tsmp]],
            "columns": ["A", "B"],
        }
        expected_split_mixed = {
            "index": [0, 1],
            "data": [[tsmp, 1], [tsmp, 2]],
            "columns": ["A", "B"],
        }

        tm.assert_dict_equal(test_data.to_dict(orient="split"), expected_split)
        tm.assert_dict_equal(test_data_mixed.to_dict(orient="split"),
                             expected_split_mixed)
Beispiel #7
0
    def test_groupby_multiindex_tuple(self):
        # GH 17979
        df = DataFrame(
            [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]],
            columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]),
        )
        expected = df.groupby([("b", 1)]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)

        df2 = DataFrame(
            df.values,
            columns=MultiIndex.from_arrays(
                [["a", "b", "b", "c"], ["d", "d", "e", "e"]]
            ),
        )
        expected = df2.groupby([("b", "d")]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)

        df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"])
        expected = df3.groupby([("b", "d")]).groups
        result = df.groupby(("b", 1)).groups
        tm.assert_dict_equal(expected, result)
Beispiel #8
0
    def test_to_dict(self, mapping):
        test_data = {
            "A": {
                "1": 1,
                "2": 2
            },
            "B": {
                "1": "1",
                "2": "2",
                "3": "3"
            }
        }

        # GH#16122
        recons_data = DataFrame(test_data).to_dict(into=mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert v2 == recons_data[k][k2]

        recons_data = DataFrame(test_data).to_dict("l", mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert v2 == recons_data[k][int(k2) - 1]

        recons_data = DataFrame(test_data).to_dict("s", mapping)

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert v2 == recons_data[k][k2]

        recons_data = DataFrame(test_data).to_dict("sp", mapping)
        expected_split = {
            "columns": ["A", "B"],
            "index": ["1", "2", "3"],
            "data": [[1.0, "1"], [2.0, "2"], [np.nan, "3"]],
        }
        tm.assert_dict_equal(recons_data, expected_split)

        recons_data = DataFrame(test_data).to_dict("r", mapping)
        expected_records = [
            {
                "A": 1.0,
                "B": "1"
            },
            {
                "A": 2.0,
                "B": "2"
            },
            {
                "A": np.nan,
                "B": "3"
            },
        ]
        assert isinstance(recons_data, list)
        assert len(recons_data) == 3
        for l, r in zip(recons_data, expected_records):
            tm.assert_dict_equal(l, r)

        # GH#10844
        recons_data = DataFrame(test_data).to_dict("i")

        for k, v in test_data.items():
            for k2, v2 in v.items():
                assert v2 == recons_data[k2][k]

        df = DataFrame(test_data)
        df["duped"] = df[df.columns[0]]
        recons_data = df.to_dict("i")
        comp_data = test_data.copy()
        comp_data["duped"] = comp_data[df.columns[0]]
        for k, v in comp_data.items():
            for k2, v2 in v.items():
                assert v2 == recons_data[k2][k]
Beispiel #9
0
    def test_mi_sparse(self):
        df = DataFrame({"A": [1, 2]},
                       index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]]))

        result = df.style._translate()
        body_0 = result["body"][0][0]
        expected_0 = {
            "value": "a",
            "display_value": "a",
            "is_visible": True,
            "type": "th",
            "attributes": 'rowspan="2"',
            "class": "row_heading level0 row0",
            "id": "level0_row0",
        }
        tm.assert_dict_equal(body_0, expected_0)

        body_1 = result["body"][0][1]
        expected_1 = {
            "value": 0,
            "display_value": 0,
            "is_visible": True,
            "type": "th",
            "class": "row_heading level1 row0",
            "id": "level1_row0",
        }
        tm.assert_dict_equal(body_1, expected_1)

        body_10 = result["body"][1][0]
        expected_10 = {
            "value": "a",
            "display_value": "a",
            "is_visible": False,
            "type": "th",
            "class": "row_heading level0 row1",
            "id": "level0_row1",
        }
        tm.assert_dict_equal(body_10, expected_10)

        head = result["head"][0]
        expected = [
            {
                "type": "th",
                "class": "blank",
                "value": self.blank_value,
                "is_visible": True,
                "display_value": self.blank_value,
            },
            {
                "type": "th",
                "class": "blank level0",
                "value": self.blank_value,
                "is_visible": True,
                "display_value": self.blank_value,
            },
            {
                "type": "th",
                "class": "col_heading level0 col0",
                "value": "A",
                "is_visible": True,
                "display_value": "A",
            },
        ]
        assert head == expected
Beispiel #10
0
def assert_tfs_frame_equal(df1, df2):
    assert_frame_equal(df1, df2)
    assert_dict_equal(df1.headers, df2.headers, compare_keys=True)