def test_index_groupby(self, simple_index): idx = simple_index[:5] to_groupby = np.array([1, 2, np.nan, 2, 1]) tm.assert_dict_equal(idx.groupby(to_groupby), { 1.0: idx[[0, 4]], 2.0: idx[[1, 3]] }) to_groupby = DatetimeIndex( [ datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1), ], tz="UTC", ).values ex_keys = [Timestamp("2011-11-01"), Timestamp("2011-12-01")] expected = {ex_keys[0]: idx[[0, 4]], ex_keys[1]: idx[[1, 3]]} tm.assert_dict_equal(idx.groupby(to_groupby), expected)
def test_observed_groups(observed): # gh-20583 # test that we have the appropriate groups cat = Categorical(["a", "c", "a"], categories=["a", "b", "c"]) df = DataFrame({"cat": cat, "vals": [1, 2, 3]}) g = df.groupby("cat", observed=observed) result = g.groups if observed: expected = { "a": Index([0, 2], dtype="int64"), "c": Index([1], dtype="int64") } else: expected = { "a": Index([0, 2], dtype="int64"), "b": Index([], dtype="int64"), "c": Index([1], dtype="int64"), } tm.assert_dict_equal(result, expected)
def test_multiindex_columns_empty_level(self): lst = [["count", "values"], ["to filter", ""]] midx = MultiIndex.from_tuples(lst) df = DataFrame([[1, "A"]], columns=midx) grouped = df.groupby("to filter").groups assert grouped["A"] == [0] grouped = df.groupby([("to filter", "")]).groups assert grouped["A"] == [0] df = DataFrame([[1, "A"], [2, "B"]], columns=midx) expected = df.groupby("to filter").groups result = df.groupby([("to filter", "")]).groups assert result == expected df = DataFrame([[1, "A"], [2, "A"]], columns=midx) expected = df.groupby("to filter").groups result = df.groupby([("to filter", "")]).groups tm.assert_dict_equal(result, expected)
def test_concatenating_accepts_pandas_dataframes(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, axis, join): dframe_x = tfs.read(_tfs_file_x_pathlib) dframe_y = pd.DataFrame( tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here objs = [dframe_x] * 4 + [ dframe_y ] * 4 # now has a mix of TfsDataFrames and pandas.DataFrames result = concat(objs, how_headers=how_headers, axis=axis, join=join) merger = partial(merge_headers, how=how_headers) # all_headers = (tfsdframe.headers for tfsdframe in objs) assert isinstance(result, TfsDataFrame) assert isinstance(result.headers, OrderedDict) all_headers = [ # empty OrderedDicts here as it's what objects are getting when converted in the call dframe.headers if isinstance(dframe, TfsDataFrame) else OrderedDict() for dframe in objs ] assert_dict_equal(result.headers, reduce(merger, all_headers)) assert_frame_equal(result, pd.concat(objs, axis=axis, join=join))
def test_merging_accepts_pandas_dataframe(self, _tfs_file_x_pathlib, _tfs_file_y_pathlib, how_headers, how, on): dframe_x = tfs.read(_tfs_file_x_pathlib) dframe_y = pd.DataFrame( tfs.read(_tfs_file_y_pathlib)) # for test, loses headers here result = dframe_x.merge(dframe_y, how_headers=how_headers, how=how, on=on) assert isinstance(result, TfsDataFrame) assert isinstance(result.headers, OrderedDict) # using empty OrderedDict here as it's what dframe_y is getting when converted in the call assert_dict_equal( result.headers, merge_headers(dframe_x.headers, OrderedDict(), how=how_headers)) assert_frame_equal( result, pd.DataFrame(dframe_x).merge(pd.DataFrame(dframe_y), how=how, on=on))
def test_to_dict_timestamp(self): # GH#11247 # split/records producing np.datetime64 rather than Timestamps # on datetime64[ns] dtypes only tsmp = Timestamp("20130101") test_data = DataFrame({"A": [tsmp, tsmp], "B": [tsmp, tsmp]}) test_data_mixed = DataFrame({"A": [tsmp, tsmp], "B": [1, 2]}) expected_records = [{"A": tsmp, "B": tsmp}, {"A": tsmp, "B": tsmp}] expected_records_mixed = [{"A": tsmp, "B": 1}, {"A": tsmp, "B": 2}] assert test_data.to_dict(orient="records") == expected_records assert test_data_mixed.to_dict( orient="records") == expected_records_mixed expected_series = { "A": Series([tsmp, tsmp], name="A"), "B": Series([tsmp, tsmp], name="B"), } expected_series_mixed = { "A": Series([tsmp, tsmp], name="A"), "B": Series([1, 2], name="B"), } tm.assert_dict_equal(test_data.to_dict(orient="series"), expected_series) tm.assert_dict_equal(test_data_mixed.to_dict(orient="series"), expected_series_mixed) expected_split = { "index": [0, 1], "data": [[tsmp, tsmp], [tsmp, tsmp]], "columns": ["A", "B"], } expected_split_mixed = { "index": [0, 1], "data": [[tsmp, 1], [tsmp, 2]], "columns": ["A", "B"], } tm.assert_dict_equal(test_data.to_dict(orient="split"), expected_split) tm.assert_dict_equal(test_data_mixed.to_dict(orient="split"), expected_split_mixed)
def test_groupby_multiindex_tuple(self): # GH 17979 df = DataFrame( [[1, 2, 3, 4], [3, 4, 5, 6], [1, 4, 2, 3]], columns=MultiIndex.from_arrays([["a", "b", "b", "c"], [1, 1, 2, 2]]), ) expected = df.groupby([("b", 1)]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) df2 = DataFrame( df.values, columns=MultiIndex.from_arrays( [["a", "b", "b", "c"], ["d", "d", "e", "e"]] ), ) expected = df2.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result) df3 = DataFrame(df.values, columns=[("a", "d"), ("b", "d"), ("b", "e"), "c"]) expected = df3.groupby([("b", "d")]).groups result = df.groupby(("b", 1)).groups tm.assert_dict_equal(expected, result)
def test_to_dict(self, mapping): test_data = { "A": { "1": 1, "2": 2 }, "B": { "1": "1", "2": "2", "3": "3" } } # GH#16122 recons_data = DataFrame(test_data).to_dict(into=mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("l", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][int(k2) - 1] recons_data = DataFrame(test_data).to_dict("s", mapping) for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k][k2] recons_data = DataFrame(test_data).to_dict("sp", mapping) expected_split = { "columns": ["A", "B"], "index": ["1", "2", "3"], "data": [[1.0, "1"], [2.0, "2"], [np.nan, "3"]], } tm.assert_dict_equal(recons_data, expected_split) recons_data = DataFrame(test_data).to_dict("r", mapping) expected_records = [ { "A": 1.0, "B": "1" }, { "A": 2.0, "B": "2" }, { "A": np.nan, "B": "3" }, ] assert isinstance(recons_data, list) assert len(recons_data) == 3 for l, r in zip(recons_data, expected_records): tm.assert_dict_equal(l, r) # GH#10844 recons_data = DataFrame(test_data).to_dict("i") for k, v in test_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k2][k] df = DataFrame(test_data) df["duped"] = df[df.columns[0]] recons_data = df.to_dict("i") comp_data = test_data.copy() comp_data["duped"] = comp_data[df.columns[0]] for k, v in comp_data.items(): for k2, v2 in v.items(): assert v2 == recons_data[k2][k]
def test_mi_sparse(self): df = DataFrame({"A": [1, 2]}, index=pd.MultiIndex.from_arrays([["a", "a"], [0, 1]])) result = df.style._translate() body_0 = result["body"][0][0] expected_0 = { "value": "a", "display_value": "a", "is_visible": True, "type": "th", "attributes": 'rowspan="2"', "class": "row_heading level0 row0", "id": "level0_row0", } tm.assert_dict_equal(body_0, expected_0) body_1 = result["body"][0][1] expected_1 = { "value": 0, "display_value": 0, "is_visible": True, "type": "th", "class": "row_heading level1 row0", "id": "level1_row0", } tm.assert_dict_equal(body_1, expected_1) body_10 = result["body"][1][0] expected_10 = { "value": "a", "display_value": "a", "is_visible": False, "type": "th", "class": "row_heading level0 row1", "id": "level0_row1", } tm.assert_dict_equal(body_10, expected_10) head = result["head"][0] expected = [ { "type": "th", "class": "blank", "value": self.blank_value, "is_visible": True, "display_value": self.blank_value, }, { "type": "th", "class": "blank level0", "value": self.blank_value, "is_visible": True, "display_value": self.blank_value, }, { "type": "th", "class": "col_heading level0 col0", "value": "A", "is_visible": True, "display_value": "A", }, ] assert head == expected
def assert_tfs_frame_equal(df1, df2): assert_frame_equal(df1, df2) assert_dict_equal(df1.headers, df2.headers, compare_keys=True)