Example #1
0
def test_to_datetime():
    # DataFrame input for to_datetime
    modin_df = pd.DataFrame({
        "year": [2015, 2016],
        "month": [2, 3],
        "day": [4, 5]
    })
    pandas_df = pandas.DataFrame({
        "year": [2015, 2016],
        "month": [2, 3],
        "day": [4, 5]
    })
    df_equals(pd.to_datetime(modin_df), pandas.to_datetime(pandas_df))

    # Series input for to_datetime
    modin_s = pd.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    pandas_s = pandas.Series(["3/11/2000", "3/12/2000", "3/13/2000"] * 1000)
    df_equals(pd.to_datetime(modin_s), pandas.to_datetime(pandas_s))

    # Other inputs for to_datetime
    value = 1490195805
    assert pd.to_datetime(value, unit="s") == pandas.to_datetime(value,
                                                                 unit="s")
    value = 1490195805433502912
    assert pd.to_datetime(value, unit="ns") == pandas.to_datetime(value,
                                                                  unit="ns")
    value = [1, 2, 3]
    assert pd.to_datetime(
        value, unit="D", origin=pd.Timestamp("2000-01-01")).equals(
            pandas.to_datetime(value,
                               unit="D",
                               origin=pandas.Timestamp("2000-01-01")))
Example #2
0
def test_to_numeric(data, errors, downcast):
    modin_series = pd.Series(data)
    pandas_series = pandas.Series(data)
    modin_result = pd.to_numeric(modin_series,
                                 errors=errors,
                                 downcast=downcast)
    pandas_result = pandas.to_numeric(pandas_series,
                                      errors=errors,
                                      downcast=downcast)
    df_equals(modin_result, pandas_result)
Example #3
0
def test_notnull(data):
    pandas_df = pandas.DataFrame(data)
    modin_df = pd.DataFrame(data)

    pandas_result = pandas.notnull(pandas_df)
    modin_result = pd.notnull(modin_df)
    df_equals(modin_result, pandas_result)

    modin_result = pd.notnull(pd.Series([1, np.nan, 2]))
    pandas_result = pandas.notnull(pandas.Series([1, np.nan, 2]))
    df_equals(modin_result, pandas_result)

    assert pd.isna(np.nan) == pandas.isna(np.nan)
Example #4
0
def test_merge():
    frame_data = {
        "col1": [0, 1, 2, 3],
        "col2": [4, 5, 6, 7],
        "col3": [8, 9, 0, 1],
        "col4": [2, 4, 5, 6],
    }

    modin_df = pd.DataFrame(frame_data)
    pandas_df = pandas.DataFrame(frame_data)

    frame_data2 = {"col1": [0, 1, 2], "col2": [1, 5, 6]}
    modin_df2 = pd.DataFrame(frame_data2)
    pandas_df2 = pandas.DataFrame(frame_data2)

    join_types = ["outer", "inner"]
    for how in join_types:
        # Defaults
        modin_result = pd.merge(modin_df, modin_df2, how=how)
        pandas_result = pandas.merge(pandas_df, pandas_df2, how=how)
        df_equals(modin_result, pandas_result)

        # left_on and right_index
        modin_result = pd.merge(modin_df,
                                modin_df2,
                                how=how,
                                left_on="col1",
                                right_index=True)
        pandas_result = pandas.merge(pandas_df,
                                     pandas_df2,
                                     how=how,
                                     left_on="col1",
                                     right_index=True)
        df_equals(modin_result, pandas_result)

        # left_index and right_on
        modin_result = pd.merge(modin_df,
                                modin_df2,
                                how=how,
                                left_index=True,
                                right_on="col1")
        pandas_result = pandas.merge(pandas_df,
                                     pandas_df2,
                                     how=how,
                                     left_index=True,
                                     right_on="col1")
        df_equals(modin_result, pandas_result)

        # left_on and right_on col1
        modin_result = pd.merge(modin_df,
                                modin_df2,
                                how=how,
                                left_on="col1",
                                right_on="col1")
        pandas_result = pandas.merge(pandas_df,
                                     pandas_df2,
                                     how=how,
                                     left_on="col1",
                                     right_on="col1")
        df_equals(modin_result, pandas_result)

        # left_on and right_on col2
        modin_result = pd.merge(modin_df,
                                modin_df2,
                                how=how,
                                left_on="col2",
                                right_on="col2")
        pandas_result = pandas.merge(pandas_df,
                                     pandas_df2,
                                     how=how,
                                     left_on="col2",
                                     right_on="col2")
        df_equals(modin_result, pandas_result)

        # left_index and right_index
        modin_result = pd.merge(modin_df,
                                modin_df2,
                                how=how,
                                left_index=True,
                                right_index=True)
        pandas_result = pandas.merge(pandas_df,
                                     pandas_df2,
                                     how=how,
                                     left_index=True,
                                     right_index=True)
        df_equals(modin_result, pandas_result)

    s = pd.Series(frame_data.get("col1"))
    with pytest.raises(ValueError):
        pd.merge(s, modin_df2)

    with pytest.raises(TypeError):
        pd.merge("Non-valid type", modin_df2)