def test_unstack(data, is_multi_idx, is_multi_col): modin_df, pandas_df = create_test_dfs(data) if is_multi_idx: index = generate_multiindex(len(pandas_df), nlevels=4, is_tree_like=True) else: index = pandas_df.index if is_multi_col: columns = generate_multiindex(len(pandas_df.columns), nlevels=3, is_tree_like=True) else: columns = pandas_df.columns pandas_df.columns = modin_df.columns = columns pandas_df.index = modin_df.index = index df_equals(modin_df.unstack(), pandas_df.unstack()) df_equals(modin_df.unstack(level=1), pandas_df.unstack(level=1)) if is_multi_idx: df_equals(modin_df.unstack(level=[0, 1]), pandas_df.unstack(level=[0, 1])) df_equals(modin_df.unstack(level=[0, 1, 2]), pandas_df.unstack(level=[0, 1, 2])) df_equals(modin_df.unstack(level=[0, 1, 2, 3]), pandas_df.unstack(level=[0, 1, 2, 3]))
def get_new_index(index, cond): if cond == "col_multi_tree" or cond == "idx_multi_tree": return generate_multiindex(len(index), nlevels=3, is_tree_like=True) elif cond == "col_multi_not_tree" or cond == "idx_multi_not_tree": return generate_multiindex(len(index), nlevels=3) else: return index
def test_sort_values(data, by, axis, ascending, inplace, kind, na_position, ignore_index, key): if ascending is None: pytest.skip("None is not a valid value for ascending.") if (axis == 1 or axis == "columns") and ignore_index: pytest.skip("Pandas bug #39426 which is fixed in Pandas 1.3") if ascending is None and key is not None: pytest.skip("Pandas bug #41318") if "multiindex" in by: index = generate_multiindex(len(data[list(data.keys())[0]]), nlevels=2) columns = generate_multiindex(len(data.keys()), nlevels=2) data = {columns[ind]: data[key] for ind, key in enumerate(data)} else: index = None columns = None modin_df = pd.DataFrame(data, index=index, columns=columns) pandas_df = pandas.DataFrame(data, index=index, columns=columns) index = modin_df.index if axis == 1 or axis == "columns" else modin_df.columns # Parse "by" spec by_list = [] for b in by.split(","): if b == "first": by_list.append(index[0]) elif b == "last": by_list.append(index[-1]) elif b == "middle": by_list.append(index[len(index) // 2]) elif b.startswith("multiindex_level"): by_list.append(index.names[int(b[len("multiindex_level"):])]) else: raise Exception('Unknown "by" specifier:' + b) # Create "ascending" list if ascending in ["list_first_True", "list_first_False"]: start = 0 if ascending == "list_first_False" else 1 ascending = [i & 1 > 0 for i in range(start, len(by_list) + start)] eval_general( modin_df, pandas_df, lambda df: df.sort_values( by_list, axis=axis, ascending=ascending, inplace=inplace, kind=kind, na_position=na_position, ignore_index=ignore_index, key=key, ), __inplace__=inplace, )
def test_all_any_level(data, axis, level, method): modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) if axis == 0: new_idx = generate_multiindex(len(modin_df.index)) modin_df.index = new_idx pandas_df.index = new_idx else: new_col = generate_multiindex(len(modin_df.columns)) modin_df.columns = new_col pandas_df.columns = new_col eval_general( modin_df, pandas_df, lambda df: getattr(df, method)(axis=axis, level=level), )
def test_count_level(data, axis, level): modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) if axis == 0: new_idx = generate_multiindex(len(modin_df.index)) modin_df.index = new_idx pandas_df.index = new_idx else: new_col = generate_multiindex(len(modin_df.columns)) modin_df.columns = new_col pandas_df.columns = new_col eval_general( modin_df, pandas_df, lambda df: df.count(axis=axis, level=level), )
def test_mad_level(level): data = test_data_values[0] modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) index = generate_multiindex(len(data.keys())) modin_df.columns = index pandas_df.columns = index eval_general( modin_df, pandas_df, lambda df: df.mad(axis=1, level=level), )
def test_kurt_kurtosis_level(level): data = test_data["int_data"] df_modin, df_pandas = pd.DataFrame(data), pandas.DataFrame(data) index = generate_multiindex(len(data.keys())) df_modin.columns = index df_pandas.columns = index eval_general( df_modin, df_pandas, lambda df: df.kurtosis(axis=1, level=level), )
def test_sort_multiindex(sort_remaining): data = test_data["int_data"] modin_df, pandas_df = pd.DataFrame(data), pandas.DataFrame(data) for index in ["index", "columns"]: new_index = generate_multiindex(len(getattr(modin_df, index))) for df in [modin_df, pandas_df]: setattr(df, index, new_index) for kwargs in [{"level": 0}, {"axis": 0}, {"axis": 1}]: with pytest.warns(UserWarning): df_equals( modin_df.sort_index(sort_remaining=sort_remaining, **kwargs), pandas_df.sort_index(sort_remaining=sort_remaining, **kwargs), )
def applier(df1, df2, **kwargs): df2.index = generate_multiindex(len(df2)) return df1.add(df2, level=1)