Esempio n. 1
0
def test_fillna_datetime_columns():
    frame_data = {
        "A": [-1, -2, np.nan],
        "B": pd.date_range("20130101", periods=3),
        "C": ["foo", "bar", None],
        "D": ["foo2", "bar2", None],
    }
    df = pandas.DataFrame(frame_data,
                          index=pd.date_range("20130110", periods=3))
    modin_df = pd.DataFrame(frame_data,
                            index=pd.date_range("20130110", periods=3))
    df_equals(modin_df.fillna("?"), df.fillna("?"))

    frame_data = {
        "A": [-1, -2, np.nan],
        "B": [
            pandas.Timestamp("2013-01-01"),
            pandas.Timestamp("2013-01-02"),
            pandas.NaT,
        ],
        "C": ["foo", "bar", None],
        "D": ["foo2", "bar2", None],
    }
    df = pandas.DataFrame(frame_data,
                          index=pd.date_range("20130110", periods=3))
    modin_df = pd.DataFrame(frame_data,
                            index=pd.date_range("20130110", periods=3))
    df_equals(modin_df.fillna("?"), df.fillna("?"))
Esempio n. 2
0
def test_tz_convert():
    modin_idx = pd.date_range("1/1/2012",
                              periods=500,
                              freq="2D",
                              tz="America/Los_Angeles")
    pandas_idx = pandas.date_range("1/1/2012",
                                   periods=500,
                                   freq="2D",
                                   tz="America/Los_Angeles")
    data = np.random.randint(0, 100, size=(len(modin_idx), 4))
    modin_df = pd.DataFrame(data, index=modin_idx)
    pandas_df = pandas.DataFrame(data, index=pandas_idx)
    modin_result = modin_df.tz_convert("UTC", axis=0)
    pandas_result = pandas_df.tz_convert("UTC", axis=0)
    df_equals(modin_result, pandas_result)

    modin_multi = pd.MultiIndex.from_arrays([modin_idx, range(len(modin_idx))])
    pandas_multi = pandas.MultiIndex.from_arrays(
        [pandas_idx, range(len(modin_idx))])
    modin_series = pd.DataFrame(data, index=modin_multi)
    pandas_series = pandas.DataFrame(data, index=pandas_multi)
    df_equals(
        modin_series.tz_convert("UTC", axis=0, level=0),
        pandas_series.tz_convert("UTC", axis=0, level=0),
    )
Esempio n. 3
0
def test_asfreq():
    index = pd.date_range("1/1/2000", periods=4, freq="T")
    series = pd.Series([0.0, None, 2.0, 3.0], index=index)
    df = pd.DataFrame({"s": series})
    with pytest.warns(UserWarning):
        # We are only testing that this defaults to pandas, so we will just check for
        # the warning
        df.asfreq(freq="30S")
Esempio n. 4
0
def test_getitem_datetime_slice():
    data = {"data": range(1000)}
    index = pd.date_range("2017/1/4", periods=1000)
    modin_df = pd.DataFrame(data=data, index=index)
    pandas_df = pandas.DataFrame(data=data, index=index)

    s = slice("2017-01-06", "2017-01-09")
    df_equals(modin_df[s], pandas_df[s])
Esempio n. 5
0
def test_first():
    i = pd.date_range("2010-04-09", periods=400, freq="2D")
    modin_df = pd.DataFrame({"A": list(range(400)), "B": list(range(400))}, index=i)
    pandas_df = pandas.DataFrame(
        {"A": list(range(400)), "B": list(range(400))}, index=i
    )
    df_equals(modin_df.first("3D"), pandas_df.first("3D"))
    df_equals(modin_df.first("20D"), pandas_df.first("20D"))
Esempio n. 6
0
def test_at_time():
    i = pd.date_range("2008-01-01", periods=1000, freq="12H")
    modin_df = pd.DataFrame({"A": list(range(1000)), "B": list(range(1000))}, index=i)
    pandas_df = pandas.DataFrame(
        {"A": list(range(1000)), "B": list(range(1000))}, index=i
    )
    df_equals(modin_df.at_time("12:00"), pandas_df.at_time("12:00"))
    df_equals(modin_df.at_time("3:00"), pandas_df.at_time("3:00"))
    df_equals(modin_df.T.at_time("12:00", axis=1), pandas_df.T.at_time("12:00", axis=1))
Esempio n. 7
0
def test_tz_localize():
    idx = pd.date_range("1/1/2012", periods=400, freq="2D")
    data = np.random.randint(0, 100, size=(len(idx), 4))
    modin_df = pd.DataFrame(data, index=idx)
    pandas_df = pandas.DataFrame(data, index=idx)
    df_equals(modin_df.tz_localize("UTC", axis=0), pandas_df.tz_localize("UTC", axis=0))
    df_equals(
        modin_df.tz_localize("America/Los_Angeles", axis=0),
        pandas_df.tz_localize("America/Los_Angeles", axis=0),
    )
Esempio n. 8
0
def test_dataframe_dt_index(axis, on, closed, window):
    index = pandas.date_range("31/12/2000", periods=12, freq="T")
    data = {"A": range(12), "B": range(12)}
    pandas_df = pandas.DataFrame(data, index=index)
    modin_df = pd.DataFrame(data, index=index)
    if on is not None and axis == 0 and isinstance(window, str):
        pandas_df[on] = pandas.date_range("22/06/1941", periods=12, freq="T")
        modin_df[on] = pd.date_range("22/06/1941", periods=12, freq="T")
    else:
        on = None
    if axis == "columns":
        pandas_df = pandas_df.T
        modin_df = modin_df.T
    pandas_rolled = pandas_df.rolling(window=window,
                                      on=on,
                                      axis=axis,
                                      closed=closed)
    modin_rolled = modin_df.rolling(window=window,
                                    on=on,
                                    axis=axis,
                                    closed=closed)
    if isinstance(window, int):
        # This functions are very slowly for data from test_rolling
        df_equals(modin_rolled.corr(modin_df, True),
                  pandas_rolled.corr(pandas_df, True))
        df_equals(modin_rolled.corr(modin_df, False),
                  pandas_rolled.corr(pandas_df, False))
        df_equals(modin_rolled.cov(modin_df, True),
                  pandas_rolled.cov(pandas_df, True))
        df_equals(modin_rolled.cov(modin_df, False),
                  pandas_rolled.cov(pandas_df, False))
        if axis == 0:
            df_equals(
                modin_rolled.cov(modin_df[modin_df.columns[0]], True),
                pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True),
            )
            df_equals(
                modin_rolled.corr(modin_df[modin_df.columns[0]], True),
                pandas_rolled.corr(pandas_df[pandas_df.columns[0]], True),
            )
    else:
        df_equals(modin_rolled.skew(), pandas_rolled.skew())
        df_equals(
            modin_rolled.apply(np.sum, raw=True),
            pandas_rolled.apply(np.sum, raw=True),
        )
        df_equals(modin_rolled.aggregate(np.sum),
                  pandas_rolled.aggregate(np.sum))
        df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))
        # `Rolling.counts` has a buggy side-effect on other rolling functions described in:
        # https://github.com/pandas-dev/pandas/issues/39554
        # So the testing of `.count` should always be the last until this bug
        # will be fixed in pandas, to avoid this side-effect
        df_equals(modin_rolled.count(), pandas_rolled.count())
Esempio n. 9
0
def test_dataframe_dt_index(axis, on, closed, window):
    index = pandas.date_range("31/12/2000", periods=12, freq="T")
    data = {"A": range(12), "B": range(12)}
    pandas_df = pandas.DataFrame(data, index=index)
    modin_df = pd.DataFrame(data, index=index)
    if on is not None and axis == 0 and isinstance(window, str):
        pandas_df[on] = pandas.date_range("22/06/1941", periods=12, freq="T")
        modin_df[on] = pd.date_range("22/06/1941", periods=12, freq="T")
    else:
        on = None
    if axis == "columns":
        pandas_df = pandas_df.T
        modin_df = modin_df.T
    pandas_rolled = pandas_df.rolling(window=window,
                                      on=on,
                                      axis=axis,
                                      closed=closed)
    modin_rolled = modin_df.rolling(window=window,
                                    on=on,
                                    axis=axis,
                                    closed=closed)
    if isinstance(window, int):
        # This functions are very slowly for data from test_rolling
        df_equals(modin_rolled.corr(modin_df, True),
                  pandas_rolled.corr(pandas_df, True))
        df_equals(modin_rolled.corr(modin_df, False),
                  pandas_rolled.corr(pandas_df, False))
        df_equals(modin_rolled.cov(modin_df, True),
                  pandas_rolled.cov(pandas_df, True))
        df_equals(modin_rolled.cov(modin_df, False),
                  pandas_rolled.cov(pandas_df, False))
        if axis == 0:
            df_equals(
                modin_rolled.cov(modin_df[modin_df.columns[0]], True),
                pandas_rolled.cov(pandas_df[pandas_df.columns[0]], True),
            )
            df_equals(
                modin_rolled.corr(modin_df[modin_df.columns[0]], True),
                pandas_rolled.corr(pandas_df[pandas_df.columns[0]], True),
            )
    else:
        df_equals(modin_rolled.count(), pandas_rolled.count())
        df_equals(modin_rolled.skew(), pandas_rolled.skew())
        df_equals(
            modin_rolled.apply(np.sum, raw=True),
            pandas_rolled.apply(np.sum, raw=True),
        )
        df_equals(modin_rolled.aggregate(np.sum),
                  pandas_rolled.aggregate(np.sum))
        df_equals(modin_rolled.quantile(0.1), pandas_rolled.quantile(0.1))
Esempio n. 10
0
def test_reindex_like():
    df1 = pd.DataFrame(
        [
            [24.3, 75.7, "high"],
            [31, 87.8, "high"],
            [22, 71.6, "medium"],
            [35, 95, "medium"],
        ],
        columns=["temp_celsius", "temp_fahrenheit", "windspeed"],
        index=pd.date_range(start="2014-02-12", end="2014-02-15", freq="D"),
    )
    df2 = pd.DataFrame(
        [[28, "low"], [30, "low"], [35.1, "medium"]],
        columns=["temp_celsius", "windspeed"],
        index=pd.DatetimeIndex(["2014-02-12", "2014-02-13", "2014-02-15"]),
    )
    with pytest.warns(UserWarning):
        df2.reindex_like(df1)
Esempio n. 11
0
def test_to_timestamp():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    df = pd.DataFrame(np.random.randint(0, 100, size=(len(idx), 4)), index=idx)

    with pytest.warns(UserWarning):
        df.to_period().to_timestamp()
Esempio n. 12
0
def test_tshift():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    data = np.random.randint(0, 100, size=(len(idx), 4))
    modin_df = pd.DataFrame(data, index=idx)
    pandas_df = pandas.DataFrame(data, index=idx)
    df_equals(modin_df.tshift(4), pandas_df.tshift(4))
Esempio n. 13
0
def test_to_timestamp():
    idx = pd.date_range("1/1/2012", periods=5, freq="M")
    df = pd.DataFrame(np.random.randint(0, 100, size=(len(idx), 4)), index=idx)

    with warns_that_defaulting_to_pandas():
        df.to_period().to_timestamp()