Esempio n. 1
0
def test_unstack(data, is_multi_idx, is_multi_col):
    modin_df, pandas_df = create_test_dfs(data)

    if is_multi_idx:
        index = generate_multiindex(len(pandas_df),
                                    nlevels=4,
                                    is_tree_like=True)
    else:
        index = pandas_df.index

    if is_multi_col:
        columns = generate_multiindex(len(pandas_df.columns),
                                      nlevels=3,
                                      is_tree_like=True)
    else:
        columns = pandas_df.columns

    pandas_df.columns = modin_df.columns = columns
    pandas_df.index = modin_df.index = index

    df_equals(modin_df.unstack(), pandas_df.unstack())
    df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1))
    if is_multi_idx:
        df_equals(modin_df.unstack(level=[0, 1]),
                  pandas_df.unstack(level=[0, 1]))
        df_equals(modin_df.unstack(level=[0, 1, 2]),
                  pandas_df.unstack(level=[0, 1, 2]))
        df_equals(modin_df.unstack(level=[0, 1, 2, 3]),
                  pandas_df.unstack(level=[0, 1, 2, 3]))
Esempio n. 2
0
 def get_new_index(index, cond):
     if cond == "col_multi_tree" or cond == "idx_multi_tree":
         return generate_multiindex(len(index), nlevels=3, is_tree_like=True)
     elif cond == "col_multi_not_tree" or cond == "idx_multi_not_tree":
         return generate_multiindex(len(index), nlevels=3)
     else:
         return index
Esempio n. 3
0
def test_sort_values(data, by, axis, ascending, inplace, kind, na_position,
                     ignore_index, key):
    if ascending is None:
        pytest.skip("None is not a valid value for ascending.")
    if (axis == 1 or axis == "columns") and ignore_index:
        pytest.skip("Pandas bug #39426 which is fixed in Pandas 1.3")

    if ascending is None and key is not None:
        pytest.skip("Pandas bug #41318")

    if "multiindex" in by:
        index = generate_multiindex(len(data[list(data.keys())[0]]), nlevels=2)
        columns = generate_multiindex(len(data.keys()), nlevels=2)
        data = {columns[ind]: data[key] for ind, key in enumerate(data)}
    else:
        index = None
        columns = None

    modin_df = pd.DataFrame(data, index=index, columns=columns)
    pandas_df = pandas.DataFrame(data, index=index, columns=columns)

    index = modin_df.index if axis == 1 or axis == "columns" else modin_df.columns

    # Parse "by" spec
    by_list = []
    for b in by.split(","):
        if b == "first":
            by_list.append(index[0])
        elif b == "last":
            by_list.append(index[-1])
        elif b == "middle":
            by_list.append(index[len(index) // 2])
        elif b.startswith("multiindex_level"):
            by_list.append(index.names[int(b[len("multiindex_level"):])])
        else:
            raise Exception('Unknown "by" specifier:' + b)

    # Create "ascending" list
    if ascending in ["list_first_True", "list_first_False"]:
        start = 0 if ascending == "list_first_False" else 1
        ascending = [i & 1 > 0 for i in range(start, len(by_list) + start)]

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.sort_values(
            by_list,
            axis=axis,
            ascending=ascending,
            inplace=inplace,
            kind=kind,
            na_position=na_position,
            ignore_index=ignore_index,
            key=key,
        ),
        __inplace__=inplace,
    )
Esempio n. 4
0
def test_all_any_level(data, axis, level, method):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    if axis == 0:
        new_idx = generate_multiindex(len(modin_df.index))
        modin_df.index = new_idx
        pandas_df.index = new_idx
    else:
        new_col = generate_multiindex(len(modin_df.columns))
        modin_df.columns = new_col
        pandas_df.columns = new_col

    eval_general(
        modin_df,
        pandas_df,
        lambda df: getattr(df, method)(axis=axis, level=level),
    )
Esempio n. 5
0
def test_count_level(data, axis, level):
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    if axis == 0:
        new_idx = generate_multiindex(len(modin_df.index))
        modin_df.index = new_idx
        pandas_df.index = new_idx
    else:
        new_col = generate_multiindex(len(modin_df.columns))
        modin_df.columns = new_col
        pandas_df.columns = new_col

    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.count(axis=axis, level=level),
    )
Esempio n. 6
0
def test_mad_level(level):
    data = test_data_values[0]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    index = generate_multiindex(len(data.keys()))
    modin_df.columns = index
    pandas_df.columns = index
    eval_general(
        modin_df,
        pandas_df,
        lambda df: df.mad(axis=1, level=level),
    )
Esempio n. 7
0
def test_kurt_kurtosis_level(level):
    data = test_data["int_data"]
    df_modin, df_pandas = pd.DataFrame(data), pandas.DataFrame(data)

    index = generate_multiindex(len(data.keys()))
    df_modin.columns = index
    df_pandas.columns = index

    eval_general(
        df_modin,
        df_pandas,
        lambda df: df.kurtosis(axis=1, level=level),
    )
def test_sort_multiindex(sort_remaining):
    data = test_data["int_data"]
    modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data)

    for index in ["index", "columns"]:
        new_index = generate_multiindex(len(getattr(modin_df, index)))
        for df in [modin_df, pandas_df]:
            setattr(df, index, new_index)

    for kwargs in [{"level": 0}, {"axis": 0}, {"axis": 1}]:
        with pytest.warns(UserWarning):
            df_equals(
                modin_df.sort_index(sort_remaining=sort_remaining, **kwargs),
                pandas_df.sort_index(sort_remaining=sort_remaining, **kwargs),
            )
Esempio n. 9
0
 def applier(df1, df2, **kwargs):
     df2.index = generate_multiindex(len(df2))
     return df1.add(df2, level=1)