def test_to_datetime(): # DataFrame input for to_datetime modin_df = pd.DataFrame({ "year": [2015, 2016], "month": [2, 3], "day": [4, 5] }) pandas_df = pandas.DataFrame({ "year": [2015, 2016], "month": [2, 3], "day": [4, 5] }) df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df)) # Series input for to_datetime modin_s = pd.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000) pandas_s = pandas.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000) df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s)) # Other inputs for to_datetime value = 1490195805 assert pd.to_datetime(value, unit="s") == pandas.to_datetime(value, unit="s") value = 1490195805433502912 assert pd.to_datetime(value, unit="ns") == pandas.to_datetime(value, unit="ns") value = [1, 2, 3] assert pd.to_datetime( value, unit="D", origin=pd.Timestamp("2000-01-01")).equals( pandas.to_datetime(value, unit="D", origin=pandas.Timestamp("2000-01-01")))
def test_to_numeric(data, errors, downcast): modin_series = pd.Series(data) pandas_series = pandas.Series(data) modin_result = pd.to_numeric(modin_series, errors=errors, downcast=downcast) pandas_result = pandas.to_numeric(pandas_series, errors=errors, downcast=downcast) df_equals(modin_result, pandas_result)
def test_notnull(data): pandas_df = pandas.DataFrame(data) modin_df = pd.DataFrame(data) pandas_result = pandas.notnull(pandas_df) modin_result = pd.notnull(modin_df) df_equals(modin_result, pandas_result) modin_result = pd.notnull(pd.Series([1, np.nan, 2])) pandas_result = pandas.notnull(pandas.Series([1, np.nan, 2])) df_equals(modin_result, pandas_result) assert pd.isna(np.nan) == pandas.isna(np.nan)
def test_merge(): frame_data = { "col1": [0, 1, 2, 3], "col2": [4, 5, 6, 7], "col3": [8, 9, 0, 1], "col4": [2, 4, 5, 6], } modin_df = pd.DataFrame(frame_data) pandas_df = pandas.DataFrame(frame_data) frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]} modin_df2 = pd.DataFrame(frame_data2) pandas_df2 = pandas.DataFrame(frame_data2) join_types = ["outer", "inner"] for how in join_types: # Defaults modin_result = pd.merge(modin_df, modin_df2, how=how) pandas_result = pandas.merge(pandas_df, pandas_df2, how=how) df_equals(modin_result, pandas_result) # left_on and right_index modin_result = pd.merge(modin_df, modin_df2, how=how, left_on="col1", right_index=True) pandas_result = pandas.merge(pandas_df, pandas_df2, how=how, left_on="col1", right_index=True) df_equals(modin_result, pandas_result) # left_index and right_on modin_result = pd.merge(modin_df, modin_df2, how=how, left_index=True, right_on="col1") pandas_result = pandas.merge(pandas_df, pandas_df2, how=how, left_index=True, right_on="col1") df_equals(modin_result, pandas_result) # left_on and right_on col1 modin_result = pd.merge(modin_df, modin_df2, how=how, left_on="col1", right_on="col1") pandas_result = pandas.merge(pandas_df, pandas_df2, how=how, left_on="col1", right_on="col1") df_equals(modin_result, pandas_result) # left_on and right_on col2 modin_result = pd.merge(modin_df, modin_df2, how=how, left_on="col2", right_on="col2") pandas_result = pandas.merge(pandas_df, pandas_df2, how=how, left_on="col2", right_on="col2") df_equals(modin_result, pandas_result) # left_index and right_index modin_result = pd.merge(modin_df, modin_df2, how=how, left_index=True, right_index=True) pandas_result = pandas.merge(pandas_df, pandas_df2, how=how, left_index=True, right_index=True) df_equals(modin_result, pandas_result) s = pd.Series(frame_data.get("col1")) with pytest.raises(ValueError): pd.merge(s, modin_df2) with pytest.raises(TypeError): pd.merge("Non-valid type", modin_df2)