Exemple #1
0
def test_quantile(request, data, q):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if not name_contains(request.node.name, no_numeric_dfs):
        df_equals(modin_df.quantile(q), pandas_df.quantile(q))
        df_equals(modin_df.quantile(q, axis=1), pandas_df.quantile(q, axis=1))

        try:
            pandas_result = pandas_df.quantile(q, axis=1, numeric_only=False)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.quantile(q, axis=1, numeric_only=False)
        else:
            modin_result = modin_df.quantile(q, axis=1, numeric_only=False)
            df_equals(modin_result, pandas_result)
    else:
        with pytest.raises(ValueError):
            modin_df.quantile(q)

    if not name_contains(request.node.name, no_numeric_dfs):
        df_equals(modin_df.T.quantile(q), pandas_df.T.quantile(q))
        df_equals(modin_df.T.quantile(q, axis=1), pandas_df.T.quantile(q, axis=1))

        try:
            pandas_result = pandas_df.T.quantile(q, axis=1, numeric_only=False)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.T.quantile(q, axis=1, numeric_only=False)
        else:
            modin_result = modin_df.T.quantile(q, axis=1, numeric_only=False)
            df_equals(modin_result, pandas_result)
    else:
        with pytest.raises(ValueError):
            modin_df.T.quantile(q)
Exemple #2
0
def test_cumsum(request, data, axis, skipna):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    # pandas exhibits weird behavior for this case
    # Remove this case when we can pull the error messages from backend
    if name_contains(request.node.name, ["datetime_timedelta_data"
                                         ]) and (axis == 0 or axis == "rows"):
        with pytest.raises(TypeError):
            modin_df.cumsum(axis=axis, skipna=skipna)
    else:
        try:
            pandas_result = pandas_df.cumsum(axis=axis, skipna=skipna)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.cumsum(axis=axis, skipna=skipna)
        else:
            modin_result = modin_df.cumsum(axis=axis, skipna=skipna)
            df_equals(modin_result, pandas_result)

    if name_contains(request.node.name, ["datetime_timedelta_data"
                                         ]) and (axis == 0 or axis == "rows"):
        with pytest.raises(TypeError):
            modin_df.T.cumsum(axis=axis, skipna=skipna)
    else:
        try:
            pandas_result = pandas_df.T.cumsum(axis=axis, skipna=skipna)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.T.cumsum(axis=axis, skipna=skipna)
        else:
            modin_result = modin_df.T.cumsum(axis=axis, skipna=skipna)
            df_equals(modin_result, pandas_result)
Exemple #3
0
def test_agg_numeric(request, data, axis, func):
    if name_contains(request.node.name, numeric_agg_funcs) and name_contains(
        request.node.name, numeric_dfs
    ):
        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)

        try:
            pandas_result = pandas_df.agg(func, axis)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.agg(func, axis)
        else:
            modin_result = modin_df.agg(func, axis)
            df_equals(modin_result, pandas_result)
Exemple #4
0
def test_applymap_numeric(request, data, testfunc):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        try:
            pandas_result = pandas_df.applymap(testfunc)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.applymap(testfunc)
        else:
            modin_result = modin_df.applymap(testfunc)
            df_equals(modin_result, pandas_result)
Exemple #5
0
def test_iloc(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if not name_contains(request.node.name, ["empty_data"]):
        # Scaler
        np.testing.assert_equal(modin_df.iloc[0, 1], pandas_df.iloc[0, 1])

        # Series
        df_equals(modin_df.iloc[0], pandas_df.iloc[0])
        df_equals(modin_df.iloc[1:, 0], pandas_df.iloc[1:, 0])
        df_equals(modin_df.iloc[1:2, 0], pandas_df.iloc[1:2, 0])

        # DataFrame
        df_equals(modin_df.iloc[[1, 2]], pandas_df.iloc[[1, 2]])
        # See issue #80
        # df_equals(modin_df.iloc[[1, 2], [1, 0]], pandas_df.iloc[[1, 2], [1, 0]])
        df_equals(modin_df.iloc[1:2, 0:2], pandas_df.iloc[1:2, 0:2])

        # Issue #43
        modin_df.iloc[0:3, :]

        # Write Item
        modin_df.iloc[[1, 2]] = 42
        pandas_df.iloc[[1, 2]] = 42
        df_equals(modin_df, pandas_df)

        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)
        modin_df.iloc[0] = modin_df.iloc[1]
        pandas_df.iloc[0] = pandas_df.iloc[1]
        df_equals(modin_df, pandas_df)

        modin_df = pd.DataFrame(data)
        pandas_df = pandas.DataFrame(data)
        modin_df.iloc[:, 0] = modin_df.iloc[:, 1]
        pandas_df.iloc[:, 0] = pandas_df.iloc[:, 1]
        df_equals(modin_df, pandas_df)

        # From issue #1775
        df_equals(
            modin_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],
            pandas_df.iloc[lambda df: df.index.get_indexer_for(df.index[:5])],
        )
    else:
        with pytest.raises(IndexError):
            modin_df.iloc[0, 1]
Exemple #6
0
def test_apply_numeric(request, data, func, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        try:
            pandas_result = pandas_df.apply(func, axis)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.apply(func, axis)
        else:
            modin_result = modin_df.apply(func, axis)
            df_equals(modin_result, pandas_result)

    if "empty_data" not in request.node.name:
        key = modin_df.columns[0]
        modin_result = modin_df.apply(lambda df: df.drop(key), axis=1)
        pandas_result = pandas_df.apply(lambda df: df.drop(key), axis=1)
        df_equals(modin_result, pandas_result)
Exemple #7
0
def test_plot(request, data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        # We have to test this way because equality in plots means same object.
        zipped_plot_lines = zip(modin_df.plot().lines, pandas_df.plot().lines)
        for left, right in zipped_plot_lines:
            if isinstance(left.get_xdata(),
                          np.ma.core.MaskedArray) and isinstance(
                              right.get_xdata(), np.ma.core.MaskedArray):
                assert all((left.get_xdata() == right.get_xdata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())
            if isinstance(left.get_ydata(),
                          np.ma.core.MaskedArray) and isinstance(
                              right.get_ydata(), np.ma.core.MaskedArray):
                assert all((left.get_ydata() == right.get_ydata()).data)
            else:
                assert np.array_equal(left.get_xdata(), right.get_xdata())
Exemple #8
0
def test_clip(request, data, axis):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if name_contains(request.node.name, numeric_dfs):
        ind_len = (
            len(modin_df.index)
            if not pandas.DataFrame()._get_axis_number(axis)
            else len(modin_df.columns)
        )
        # set bounds
        lower, upper = np.sort(random_state.random_integers(RAND_LOW, RAND_HIGH, 2))
        lower_list = random_state.random_integers(RAND_LOW, RAND_HIGH, ind_len)
        upper_list = random_state.random_integers(RAND_LOW, RAND_HIGH, ind_len)

        # test only upper scalar bound
        modin_result = modin_df.clip(None, upper, axis=axis)
        pandas_result = pandas_df.clip(None, upper, axis=axis)
        df_equals(modin_result, pandas_result)

        # test lower and upper scalar bound
        modin_result = modin_df.clip(lower, upper, axis=axis)
        pandas_result = pandas_df.clip(lower, upper, axis=axis)
        df_equals(modin_result, pandas_result)

        # test lower and upper list bound on each column
        modin_result = modin_df.clip(lower_list, upper_list, axis=axis)
        pandas_result = pandas_df.clip(lower_list, upper_list, axis=axis)
        df_equals(modin_result, pandas_result)

        # test only upper list bound on each column
        modin_result = modin_df.clip(np.nan, upper_list, axis=axis)
        pandas_result = pandas_df.clip(np.nan, upper_list, axis=axis)
        df_equals(modin_result, pandas_result)

        with pytest.raises(ValueError):
            modin_df.clip(lower=[1, 2, 3], axis=None)
def test_sort_values(request, data, axis, ascending, na_position):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    if "empty_data" not in request.node.name and (
        (axis == 0 or axis == "over rows")
            or name_contains(request.node.name, numeric_dfs)):
        index = modin_df.index if axis == 1 or axis == "columns" else modin_df.columns
        key = index[0]
        modin_result = modin_df.sort_values(
            key,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=False,
        )
        pandas_result = pandas_df.sort_values(
            key,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=False,
        )
        df_equals(modin_result, pandas_result)

        modin_df_cp = modin_df.copy()
        pandas_df_cp = pandas_df.copy()
        modin_df_cp.sort_values(
            key,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=True,
        )
        pandas_df_cp.sort_values(
            key,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=True,
        )
        df_equals(modin_df_cp, pandas_df_cp)

        keys = [key, index[-1]]
        modin_result = modin_df.sort_values(
            keys,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=False,
        )
        pandas_result = pandas_df.sort_values(
            keys,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=False,
        )
        df_equals(modin_result, pandas_result)

        modin_df_cp = modin_df.copy()
        pandas_df_cp = pandas_df.copy()
        modin_df_cp.sort_values(
            keys,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=True,
        )
        pandas_df_cp.sort_values(
            keys,
            axis=axis,
            ascending=ascending,
            na_position=na_position,
            inplace=True,
        )
        df_equals(modin_df_cp, pandas_df_cp)