Esempio n. 1
0
def test_empty_df():
    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    tm.assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    tm.assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    tm.assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    tm.assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = [1, 2, 3, 4, 5]
    pd_df["a"] = [1, 2, 3, 4, 5]
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = list("ABCDEF")
    pd_df["a"] = list("ABCDEF")
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = pd.Series([1, 2, 3, 4, 5])
    pd_df["a"] = pandas.Series([1, 2, 3, 4, 5])
    df_equals(df, pd_df)
Esempio n. 2
0
def test_unique():
    modin_result = pd.unique([2, 1, 3, 3])
    pandas_result = pandas.unique([2, 1, 3, 3])
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(pd.Series([2] + [1] * 5))
    pandas_result = pandas.unique(pandas.Series([2] + [1] * 5))
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Series([pd.Timestamp("20160101"), pd.Timestamp("20160101")])
    )
    pandas_result = pandas.unique(
        pandas.Series([pandas.Timestamp("20160101"), pandas.Timestamp("20160101")])
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Series(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Series(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(
        pd.Index(
            [
                pd.Timestamp("20160101", tz="US/Eastern"),
                pd.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    pandas_result = pandas.unique(
        pandas.Index(
            [
                pandas.Timestamp("20160101", tz="US/Eastern"),
                pandas.Timestamp("20160101", tz="US/Eastern"),
            ]
        )
    )
    assert_array_equal(modin_result, pandas_result)

    modin_result = pd.unique(pd.Series(pd.Categorical(list("baabc"))))
    pandas_result = pandas.unique(pandas.Series(pandas.Categorical(list("baabc"))))
    assert_array_equal(modin_result, pandas_result)
Esempio n. 3
0
def test_mixed_type_column():
    df = pd.DataFrame({"A": [1.2, "xy", 4], "B": [3, 4, 5]})
    df = df.set_index(pd.Index(["yz", 7, 3.2]))
    view = SpreadsheetWidget(df=df)
    view._handle_view_msg_helper({
        "type": "change_sort",
        "sort_field": "A",
        "sort_ascending": True
    })
    view._handle_view_msg_helper({
        "type": "show_filter_dropdown",
        "field": "A",
        "search_val": None
    })
def test_join(test_data, test_data2):
    modin_df = pd.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)],
                       name="key"),
    )
    pandas_df = pandas.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)],
                           name="key"),
    )
    modin_df2 = pd.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)],
                       name="key"),
    )
    pandas_df2 = pandas.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)],
                           name="key"),
    )

    hows = ["inner", "left", "right", "outer"]
    ons = ["col33", "col34"]
    sorts = [False, True]
    for i in range(4):
        for j in range(2):
            modin_result = modin_df.join(
                modin_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            pandas_result = pandas_df.join(
                pandas_df2,
                how=hows[i],
                on=ons[j],
                sort=sorts[j],
                lsuffix="_caller",
                rsuffix="_other",
            )
            df_equals(modin_result, pandas_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col5": [0], "col6": [1]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["left", "right", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join(modin_df2, how=how)
        pandas_join = pandas_df.join(pandas_df2, how=how)
        df_equals(modin_join, pandas_join)

    frame_data3 = {"col7": [1, 2, 3, 5, 6, 7, 8]}

    modin_df3 = pd.DataFrame(frame_data3)
    pandas_df3 = pandas.DataFrame(frame_data3)

    join_types = ["left", "outer", "inner"]
    for how in join_types:
        modin_join = modin_df.join([modin_df2, modin_df3], how=how)
        pandas_join = pandas_df.join([pandas_df2, pandas_df3], how=how)
        df_equals(modin_join, pandas_join)
def test_merge(test_data, test_data2):
    modin_df = pd.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pd.Index([i for i in range(1, test_data.shape[0] + 1)],
                       name="key"),
    )
    pandas_df = pandas.DataFrame(
        test_data,
        columns=["col{}".format(i) for i in range(test_data.shape[1])],
        index=pandas.Index([i for i in range(1, test_data.shape[0] + 1)],
                           name="key"),
    )
    modin_df2 = pd.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pd.Index([i for i in range(1, test_data2.shape[0] + 1)],
                       name="key"),
    )
    pandas_df2 = pandas.DataFrame(
        test_data2,
        columns=["col{}".format(i) for i in range(test_data2.shape[1])],
        index=pandas.Index([i for i in range(1, test_data2.shape[0] + 1)],
                           name="key"),
    )

    hows = ["left", "inner"]
    ons = ["col33", ["col33", "col34"]]
    sorts = [False, True]
    for i in range(2):
        for j in range(2):
            modin_result = modin_df.merge(modin_df2,
                                          how=hows[i],
                                          on=ons[j],
                                          sort=sorts[j])
            pandas_result = pandas_df.merge(pandas_df2,
                                            how=hows[i],
                                            on=ons[j],
                                            sort=sorts[j])
            df_equals(modin_result, pandas_result)

            modin_result = modin_df.merge(
                modin_df2,
                how=hows[i],
                left_on="key",
                right_on="key",
                sort=sorts[j],
            )
            pandas_result = pandas_df.merge(
                pandas_df2,
                how=hows[i],
                left_on="key",
                right_on="key",
                sort=sorts[j],
            )
            df_equals(modin_result, pandas_result)

    # Test for issue #1771
    modin_df = pd.DataFrame({"name": np.arange(40)})
    modin_df2 = pd.DataFrame({"name": [39], "position": [0]})
    pandas_df = pandas.DataFrame({"name": np.arange(40)})
    pandas_df2 = pandas.DataFrame({"name": [39], "position": [0]})
    modin_result = modin_df.merge(modin_df2, on="name", how="inner")
    pandas_result = pandas_df.merge(pandas_df2, on="name", how="inner")
    df_equals(modin_result, pandas_result)

    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["outer", "inner"]
    for how in join_types:
        # Defaults
        modin_result = modin_df.merge(modin_df2, how=how)
        pandas_result = pandas_df.merge(pandas_df2, how=how)
        df_equals(modin_result, pandas_result)

        # left_on and right_index
        modin_result = modin_df.merge(modin_df2,
                                      how=how,
                                      left_on="col1",
                                      right_index=True)
        pandas_result = pandas_df.merge(pandas_df2,
                                        how=how,
                                        left_on="col1",
                                        right_index=True)
        df_equals(modin_result, pandas_result)

        # left_index and right_on
        modin_result = modin_df.merge(modin_df2,
                                      how=how,
                                      left_index=True,
                                      right_on="col1")
        pandas_result = pandas_df.merge(pandas_df2,
                                        how=how,
                                        left_index=True,
                                        right_on="col1")
        df_equals(modin_result, pandas_result)

        # left_on and right_on col1
        modin_result = modin_df.merge(modin_df2,
                                      how=how,
                                      left_on="col1",
                                      right_on="col1")
        pandas_result = pandas_df.merge(pandas_df2,
                                        how=how,
                                        left_on="col1",
                                        right_on="col1")
        df_equals(modin_result, pandas_result)

        # left_on and right_on col2
        modin_result = modin_df.merge(modin_df2,
                                      how=how,
                                      left_on="col2",
                                      right_on="col2")
        pandas_result = pandas_df.merge(pandas_df2,
                                        how=how,
                                        left_on="col2",
                                        right_on="col2")
        df_equals(modin_result, pandas_result)

        # left_index and right_index
        modin_result = modin_df.merge(modin_df2,
                                      how=how,
                                      left_index=True,
                                      right_index=True)
        pandas_result = pandas_df.merge(pandas_df2,
                                        how=how,
                                        left_index=True,
                                        right_index=True)
        df_equals(modin_result, pandas_result)

    # Named Series promoted to DF
    s = pd.Series(frame_data2.get("col1"))
    with pytest.raises(ValueError):
        modin_df.merge(s)

    s = pd.Series(frame_data2.get("col1"), name="col1")
    df_equals(modin_df.merge(s), modin_df.merge(modin_df2[["col1"]]))

    with pytest.raises(TypeError):
        modin_df.merge("Non-valid type")
Esempio n. 6
0
def test_asof_large(lookup, subset):
    data = test_data["float_nan_data"]
    index = list(range(NROWS))
    modin_where = pd.Index(lookup)
    pandas_where = pandas.Index(lookup)
    compare_asof(data, index, modin_where, pandas_where, subset)
Esempio n. 7
0
            -0.815727,
            0.785291,
            1.180861,
        ],
        1: [
            1.272731,
            1.272731,
            1.272731,
            1.272731,
            0.638028,
            0.638028,
            0.638028,
            0.638028,
            -0.683739,
            -0.683739,
            -0.683739,
            -0.683739,
            -1.227020,
            -1.227020,
            -1.227020,
            -1.227020,
        ],
    },
    index=pd.Index(
        data=[1, 1, 0, 0, 1, 1, 0, 0, 3, 3, 2, 2, 3, 3, 2, 2,], name="cluster"
    ),
)
print(df)
df1 = df.sort_values(by=['distance'])
print(df1)