def test_empty_df(): df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) tm.assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 tm.assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) assert len(df.index) == 0 assert len(df.columns) == 0 df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) tm.assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 tm.assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) assert len(df.index) == 0 assert len(df.columns) == 0 df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = [1, 2, 3, 4, 5] pd_df["a"] = [1, 2, 3, 4, 5] df_equals(df, pd_df) df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = list("ABCDEF") pd_df["a"] = list("ABCDEF") df_equals(df, pd_df) df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = pd.Series([1, 2, 3, 4, 5]) pd_df["a"] = pandas.Series([1, 2, 3, 4, 5]) df_equals(df, pd_df)
def test_unique(): modin_result = pd.unique([2, 1, 3, 3]) pandas_result = pandas.unique([2, 1, 3, 3]) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique(pd.Series([2] + [1] * 5)) pandas_result = pandas.unique(pandas.Series([2] + [1] * 5)) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")]) ) pandas_result = pandas.unique( pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")]) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Series( [ pd.Timestamp("20160101", tz="US/Eastern"), pd.Timestamp("20160101", tz="US/Eastern"), ] ) ) pandas_result = pandas.unique( pandas.Series( [ pandas.Timestamp("20160101", tz="US/Eastern"), pandas.Timestamp("20160101", tz="US/Eastern"), ] ) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique( pd.Index( [ pd.Timestamp("20160101", tz="US/Eastern"), pd.Timestamp("20160101", tz="US/Eastern"), ] ) ) pandas_result = pandas.unique( pandas.Index( [ pandas.Timestamp("20160101", tz="US/Eastern"), pandas.Timestamp("20160101", tz="US/Eastern"), ] ) ) assert_array_equal(modin_result, pandas_result) modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc")))) pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc")))) assert_array_equal(modin_result, pandas_result)
def test_mixed_type_column(): df = pd.DataFrame({"A": [1.2, "xy", 4], "B": [3, 4, 5]}) df = df.set_index(pd.Index(["yz", 7, 3.2])) view = SpreadsheetWidget(df=df) view._handle_view_msg_helper({ "type": "change_sort", "sort_field": "A", "sort_ascending": True }) view._handle_view_msg_helper({ "type": "show_filter_dropdown", "field": "A", "search_val": None })
def test_join(test_data, test_data2): modin_df = pd.DataFrame( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), ) pandas_df = pandas.DataFrame( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), ) modin_df2 = pd.DataFrame( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), ) pandas_df2 = pandas.DataFrame( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), ) hows = ["inner", "left", "right", "outer"] ons = ["col33", "col34"] sorts = [False, True] for i in range(4): for j in range(2): modin_result = modin_df.join( modin_df2, how=hows[i], on=ons[j], sort=sorts[j], lsuffix="_caller", rsuffix="_other", ) pandas_result = pandas_df.join( pandas_df2, how=hows[i], on=ons[j], sort=sorts[j], lsuffix="_caller", rsuffix="_other", ) df_equals(modin_result, pandas_result) frame_data = { "col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6], } modin_df = pd.DataFrame(frame_data) pandas_df = pandas.DataFrame(frame_data) frame_data2 = {"col5": [0], "col6": [1]} modin_df2 = pd.DataFrame(frame_data2) pandas_df2 = pandas.DataFrame(frame_data2) join_types = ["left", "right", "outer", "inner"] for how in join_types: modin_join = modin_df.join(modin_df2, how=how) pandas_join = pandas_df.join(pandas_df2, how=how) df_equals(modin_join, pandas_join) frame_data3 = {"col7": [1, 2, 3, 5, 6, 7, 8]} modin_df3 = pd.DataFrame(frame_data3) pandas_df3 = pandas.DataFrame(frame_data3) join_types = ["left", "outer", "inner"] for how in join_types: modin_join = modin_df.join([modin_df2, modin_df3], how=how) pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how) df_equals(modin_join, pandas_join)
def test_merge(test_data, test_data2): modin_df = pd.DataFrame( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pd.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), ) pandas_df = pandas.DataFrame( test_data, columns=["col{}".format(i) for i in range(test_data.shape[1])], index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)], name="key"), ) modin_df2 = pd.DataFrame( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), ) pandas_df2 = pandas.DataFrame( test_data2, columns=["col{}".format(i) for i in range(test_data2.shape[1])], index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)], name="key"), ) hows = ["left", "inner"] ons = ["col33", ["col33", "col34"]] sorts = [False, True] for i in range(2): for j in range(2): modin_result = modin_df.merge(modin_df2, how=hows[i], on=ons[j], sort=sorts[j]) pandas_result = pandas_df.merge(pandas_df2, how=hows[i], on=ons[j], sort=sorts[j]) df_equals(modin_result, pandas_result) modin_result = modin_df.merge( modin_df2, how=hows[i], left_on="key", right_on="key", sort=sorts[j], ) pandas_result = pandas_df.merge( pandas_df2, how=hows[i], left_on="key", right_on="key", sort=sorts[j], ) df_equals(modin_result, pandas_result) # Test for issue #1771 modin_df = pd.DataFrame({"name": np.arange(40)}) modin_df2 = pd.DataFrame({"name": [39], "position": [0]}) pandas_df = pandas.DataFrame({"name": np.arange(40)}) pandas_df2 = pandas.DataFrame({"name": [39], "position": [0]}) modin_result = modin_df.merge(modin_df2, on="name", how="inner") pandas_result = pandas_df.merge(pandas_df2, on="name", how="inner") df_equals(modin_result, pandas_result) frame_data = { "col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6], } modin_df = pd.DataFrame(frame_data) pandas_df = pandas.DataFrame(frame_data) frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]} modin_df2 = pd.DataFrame(frame_data2) pandas_df2 = pandas.DataFrame(frame_data2) join_types = ["outer", "inner"] for how in join_types: # Defaults modin_result = modin_df.merge(modin_df2, how=how) pandas_result = pandas_df.merge(pandas_df2, how=how) df_equals(modin_result, pandas_result) # left_on and right_index modin_result = modin_df.merge(modin_df2, how=how, left_on="col1", right_index=True) pandas_result = pandas_df.merge(pandas_df2, how=how, left_on="col1", right_index=True) df_equals(modin_result, pandas_result) # left_index and right_on modin_result = modin_df.merge(modin_df2, how=how, left_index=True, right_on="col1") pandas_result = pandas_df.merge(pandas_df2, how=how, left_index=True, right_on="col1") df_equals(modin_result, pandas_result) # left_on and right_on col1 modin_result = modin_df.merge(modin_df2, how=how, left_on="col1", right_on="col1") pandas_result = pandas_df.merge(pandas_df2, how=how, left_on="col1", right_on="col1") df_equals(modin_result, pandas_result) # left_on and right_on col2 modin_result = modin_df.merge(modin_df2, how=how, left_on="col2", right_on="col2") pandas_result = pandas_df.merge(pandas_df2, how=how, left_on="col2", right_on="col2") df_equals(modin_result, pandas_result) # left_index and right_index modin_result = modin_df.merge(modin_df2, how=how, left_index=True, right_index=True) pandas_result = pandas_df.merge(pandas_df2, how=how, left_index=True, right_index=True) df_equals(modin_result, pandas_result) # Named Series promoted to DF s = pd.Series(frame_data2.get("col1")) with pytest.raises(ValueError): modin_df.merge(s) s = pd.Series(frame_data2.get("col1"), name="col1") df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]])) with pytest.raises(TypeError): modin_df.merge("Non-valid type")
def test_asof_large(lookup, subset): data = test_data["float_nan_data"] index = list(range(NROWS)) modin_where = pd.Index(lookup) pandas_where = pandas.Index(lookup) compare_asof(data, index, modin_where, pandas_where, subset)
-0.815727, 0.785291, 1.180861, ], 1: [ 1.272731, 1.272731, 1.272731, 1.272731, 0.638028, 0.638028, 0.638028, 0.638028, -0.683739, -0.683739, -0.683739, -0.683739, -1.227020, -1.227020, -1.227020, -1.227020, ], }, index=pd.Index( data=[1, 1, 0, 0, 1, 1, 0, 0, 3, 3, 2, 2, 3, 3, 2, 2,], name="cluster" ), ) print(df) df1 = df.sort_values(by=['distance']) print(df1)