def test_equals(): pandas_df1 = pd.DataFrame({'col1': [2.9, 3, 3, 3], 'col2': [2, 3, 4, 1]}) ray_df1 = rdf.from_pandas(pandas_df1, 2) ray_df2 = rdf.from_pandas(pandas_df1, 3) assert ray_df1.equals(ray_df2) pandas_df2 = pd.DataFrame({'col1': [2.9, 3, 3, 3], 'col2': [2, 3, 5, 1]}) ray_df3 = rdf.from_pandas(pandas_df2, 4) assert not ray_df3.equals(ray_df1) assert not ray_df3.equals(ray_df2)
def test_float_dataframe(): pandas_df = pd.DataFrame({ 'col1': [0.0, 1.0, 2.0, 3.0], 'col2': [4.0, 5.0, 6.0, 7.0], 'col3': [8.0, 9.0, 10.0, 11.0], 'col4': [12.0, 13.0, 14.0, 15.0] }) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [ lambda x: x + 1, lambda x: str(x), lambda x: x * x, lambda x: x, lambda x: False ] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df)
def test_float_dataframe(): pandas_df = pd.DataFrame({ 'col1': [0.0, 1.0, 2.0, 3.0], 'col2': [4.0, 5.0, 6.0, 7.0], 'col3': [8.0, 9.0, 10.0, 11.0], 'col4': [12.0, 13.0, 14.0, 15.0], 'col5': [0.0, 0.0, 0.0, 0.0] }) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [ lambda x: x + 1, lambda x: str(x), lambda x: x * x, lambda x: x, lambda x: False ] keys = ['col1', 'col2', 'col3', 'col4'] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) test_round(ray_df, pandas_df) test_all(ray_df, pandas_df) test_any(ray_df, pandas_df) test___getitem__(ray_df, pandas_df) test___delitem__(ray_df, pandas_df) test___copy__(ray_df, pandas_df) test___deepcopy__(ray_df, pandas_df) test_bool(ray_df, pandas_df) test_count(ray_df, pandas_df) test_head(ray_df, pandas_df) test_tail(ray_df, pandas_df) test_idxmax(ray_df, pandas_df) test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df)
def create_test_dataframe(): df = pd.DataFrame({'col1': [0, 1, 2, 3], 'col2': [4, 5, 6, 7], 'col3': [8, 9, 10, 11], 'col4': [12, 13, 14, 15], 'col5': [0, 0, 0, 0]}) return rdf.from_pandas(df, 2)
def test_bool(ray_df, pd_df): with pytest.raises(ValueError): ray_df.bool() pd_df.bool() single_bool_pd_df = pd.DataFrame([True]) single_bool_ray_df = rdf.from_pandas(single_bool_pd_df, 1) assert single_bool_pd_df.bool() == single_bool_ray_df.bool()
def test_mixed_dtype_dataframe(): pandas_df = pd.DataFrame({ 'col1': [1, 2, 3, 4], 'col2': [4, 5, 6, 7], 'col3': [8.0, 9.4, 10.1, 11.3], 'col4': ['a', 'b', 'c', 'd']}) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [lambda x: x + x, lambda x: str(x), lambda x: x, lambda x: False] keys = ['col1', 'col2', 'col3', 'col4'] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df) test_items(ray_df, pandas_df) test_iterrows(ray_df, pandas_df) test_items(ray_df, pandas_df) test_iteritems(ray_df, pandas_df) test_itertuples(ray_df, pandas_df) test_max(ray_df, pandas_df) test_min(ray_df, pandas_df) test_notna(ray_df, pandas_df) test_notnull(ray_df, pandas_df)
def test_nan_dataframe(): pandas_df = pd.DataFrame({ 'col1': [1, 2, 3, np.nan], 'col2': [4, 5, np.nan, 7], 'col3': [8, np.nan, 10, 11], 'col4': [np.nan, 13, 14, 15]}) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [lambda x: x + x, lambda x: str(x), lambda x: x, lambda x: False] keys = ['col1', 'col2', 'col3', 'col4'] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df)
def test_int_dataframe(): ray.init() pandas_df = pd.DataFrame({ 'col1': [0, 1, 2, 3], 'col2': [4, 5, 6, 7], 'col3': [8, 9, 10, 11], 'col4': [12, 13, 14, 15] }) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [ lambda x: x + 1, lambda x: str(x), lambda x: x * x, lambda x: x, lambda x: False ] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df)
def test_int_dataframe(): ray.init() pandas_df = pd.DataFrame({'col1': [0, 1, 2, 3], 'col2': [4, 5, 6, 7], 'col3': [8, 9, 10, 11], 'col4': [12, 13, 14, 15], 'col5': [0, 0, 0, 0]}) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [lambda x: x + 1, lambda x: str(x), lambda x: x * x, lambda x: x, lambda x: False] keys = ['col1', 'col2', 'col3', 'col4'] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) test_round(ray_df, pandas_df) test_all(ray_df, pandas_df) test_any(ray_df, pandas_df) test___getitem__(ray_df, pandas_df) test___delitem__(ray_df, pandas_df) test___copy__(ray_df, pandas_df) test___deepcopy__(ray_df, pandas_df) test_bool(ray_df, pandas_df) test_count(ray_df, pandas_df) test_head(ray_df, pandas_df) test_tail(ray_df, pandas_df) test_idxmax(ray_df, pandas_df) test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df) test_iterrows(ray_df, pandas_df) test_items(ray_df, pandas_df) test_iteritems(ray_df, pandas_df) test_itertuples(ray_df, pandas_df) test_max(ray_df, pandas_df) test_min(ray_df, pandas_df) test_notna(ray_df, pandas_df) test_notnull(ray_df, pandas_df)
def test_nan_dataframe(): pandas_df = pd.DataFrame({ 'col1': [1, 2, 3, np.nan], 'col2': [4, 5, np.nan, 7], 'col3': [8, np.nan, 10, 11], 'col4': [np.nan, 13, 14, 15] }) ray_df = rdf.from_pandas(pandas_df, 2) testfuncs = [ lambda x: x + x, lambda x: str(x), lambda x: x, lambda x: False ] query_funcs = [ 'col1 < col2', 'col3 > col4', 'col1 == col2', '(col2 > col1) and (col1 < col3)' ] keys = ['col1', 'col2', 'col3', 'col4'] test_roundtrip(ray_df, pandas_df) test_index(ray_df, pandas_df) test_size(ray_df, pandas_df) test_ndim(ray_df, pandas_df) test_ftypes(ray_df, pandas_df) test_values(ray_df, pandas_df) test_axes(ray_df, pandas_df) test_shape(ray_df, pandas_df) test_add_prefix(ray_df, pandas_df) test_add_suffix(ray_df, pandas_df) for testfunc in testfuncs: test_applymap(ray_df, pandas_df, testfunc) test_copy(ray_df) test_sum(ray_df, pandas_df) test_abs(ray_df, pandas_df) test_keys(ray_df, pandas_df) test_transpose(ray_df, pandas_df) test_round(ray_df, pandas_df) test_query(ray_df, pandas_df, query_funcs) test_all(ray_df, pandas_df) test_any(ray_df, pandas_df) test___getitem__(ray_df, pandas_df) test___neg__(ray_df, pandas_df) test___iter__(ray_df, pandas_df) test___abs__(ray_df, pandas_df) test___delitem__(ray_df, pandas_df) test___copy__(ray_df, pandas_df) test___deepcopy__(ray_df, pandas_df) test_bool(ray_df, pandas_df) test_count(ray_df, pandas_df) test_head(ray_df, pandas_df, 2) test_head(ray_df, pandas_df) test_tail(ray_df, pandas_df) test_idxmax(ray_df, pandas_df) test_idxmin(ray_df, pandas_df) test_pop(ray_df, pandas_df) test_max(ray_df, pandas_df) test_min(ray_df, pandas_df) test_notna(ray_df, pandas_df) test_notnull(ray_df, pandas_df) for key in keys: test_get(ray_df, pandas_df, key) test_get_dtype_counts(ray_df, pandas_df) test_get_ftype_counts(ray_df, pandas_df) test_iterrows(ray_df, pandas_df) test_items(ray_df, pandas_df) test_iteritems(ray_df, pandas_df) test_itertuples(ray_df, pandas_df) test_loc(ray_df, pandas_df) test_iloc(ray_df, pandas_df) labels = ['a', 'b', 'c', 'd'] test_set_axis(ray_df, pandas_df, labels, 0) test_set_axis(ray_df, pandas_df, labels, 'rows') test_set_axis(ray_df, pandas_df, labels, 1) test_set_axis(ray_df, pandas_df, labels, 'columns') for key in keys: test_set_index(ray_df, pandas_df, key) test_set_index(ray_df, pandas_df, key, inplace=True) test_reset_index(ray_df, pandas_df) test_reset_index(ray_df, pandas_df, inplace=True) for key in keys: test_insert(ray_df, pandas_df, 0, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 0, "New Column", pandas_df[key]) test_insert(ray_df, pandas_df, 1, "New Column", ray_df[key]) test_insert(ray_df, pandas_df, 4, "New Column", ray_df[key])