Example #1
0
def multiindex_year_month_day_dataframe_random_data():
    """
    DataFrame with 3 level MultiIndex (year, month, day) covering
    first 100 business days from 2000-01-01 with random data
    """
    tdf = tm.makeTimeDataFrame(100)
    ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
                       lambda x: x.day]).sum()
    # use Int64Index, to make sure things work
    ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels],
                         inplace=True)
    ymd.index.set_names(["year", "month", "day"], inplace=True)
    return ymd
Example #2
0
    def test_setitem_with_expansion_numeric_into_datetimeindex(self, key):
        # GH#4940 inserting non-strings
        orig = tm.makeTimeDataFrame()
        df = orig.copy()

        df.loc[key, :] = df.iloc[0]
        ex_index = Index(list(orig.index) + [key],
                         dtype=object,
                         name=orig.index.name)
        ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0)
        expected = DataFrame(ex_data, index=ex_index, columns=orig.columns)

        tm.assert_frame_equal(df, expected)
Example #3
0
def test_append_to_multiple_dropna(setup_path):
    df1 = tm.makeTimeDataFrame()
    df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
    df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
    df = concat([df1, df2], axis=1)

    with ensure_clean_store(setup_path) as store:

        # dropna=True should guarantee rows are synchronized
        store.append_to_multiple({
            "df1": ["A", "B"],
            "df2": None
        },
                                 df,
                                 selector="df1",
                                 dropna=True)
        result = store.select_as_multiple(["df1", "df2"])
        expected = df.dropna()
        tm.assert_frame_equal(result, expected)
        tm.assert_index_equal(
            store.select("df1").index,
            store.select("df2").index)
Example #4
0
    def test_squeeze(self):
        # noop
        for s in [
                tm.makeFloatSeries(),
                tm.makeStringSeries(),
                tm.makeObjectSeries()
        ]:
            tm.assert_series_equal(s.squeeze(), s)
        for df in [tm.makeTimeDataFrame()]:
            tm.assert_frame_equal(df.squeeze(), df)

        # squeezing
        df = tm.makeTimeDataFrame().reindex(columns=["A"])
        tm.assert_series_equal(df.squeeze(), df["A"])

        # don't fail with 0 length dimensions GH11229 & GH8999
        empty_series = Series([], name="five", dtype=np.float64)
        empty_frame = DataFrame([empty_series])
        tm.assert_series_equal(empty_series, empty_series.squeeze())
        tm.assert_series_equal(empty_series, empty_frame.squeeze())

        # axis argument
        df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
        assert df.shape == (1, 1)
        tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
        tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
        tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
        tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
        assert df.squeeze() == df.iloc[0, 0]
        msg = "No axis named 2 for object type DataFrame"
        with pytest.raises(ValueError, match=msg):
            df.squeeze(axis=2)
        msg = "No axis named x for object type DataFrame"
        with pytest.raises(ValueError, match=msg):
            df.squeeze(axis="x")

        df = tm.makeTimeDataFrame(3)
        tm.assert_frame_equal(df.squeeze(axis=0), df)
Example #5
0
def test_granger_pvalues_ssr_f(test_input, expected):
    # Set random seed, otherwise testing creates a new dataframe each time.
    np.random.seed(12)

    data = testing.makeTimeDataFrame(freq="s", nper=1000)
    granger = (GrangerCausality(target_col="B",
                                x_col="A",
                                max_shift=10,
                                statistics=test_input).fit(data).results_[0])

    p_value = granger.values[1]

    # Not exactly equal but up test to 7 digits
    np.testing.assert_almost_equal(p_value, expected, decimal=7)
Example #6
0
def test_frame_select_complex(setup_path):
    # select via complex criteria

    df = tm.makeTimeDataFrame()
    df["string"] = "foo"
    df.loc[df.index[0:4], "string"] = "bar"

    with ensure_clean_store(setup_path) as store:
        store.put("df", df, format="table", data_columns=["string"])

        # empty
        result = store.select("df", 'index>df.index[3] & string="bar"')
        expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")]
        tm.assert_frame_equal(result, expected)

        result = store.select("df", 'index>df.index[3] & string="foo"')
        expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")]
        tm.assert_frame_equal(result, expected)

        # or
        result = store.select("df", 'index>df.index[3] | string="bar"')
        expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")]
        tm.assert_frame_equal(result, expected)

        result = store.select(
            "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"')
        expected = df.loc[((df.index > df.index[3]) &
                           (df.index <= df.index[6]))
                          | (df.string == "bar")]
        tm.assert_frame_equal(result, expected)

        # invert
        result = store.select("df", 'string!="bar"')
        expected = df.loc[df.string != "bar"]
        tm.assert_frame_equal(result, expected)

        # invert not implemented in numexpr :(
        msg = "cannot use an invert condition when passing to numexpr"
        with pytest.raises(NotImplementedError, match=msg):
            store.select("df", '~(string="bar")')

        # invert ok for filters
        result = store.select("df", "~(columns=['A','B'])")
        expected = df.loc[:, df.columns.difference(["A", "B"])]
        tm.assert_frame_equal(result, expected)

        # in
        result = store.select("df", "index>df.index[3] & columns in ['A','B']")
        expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"])
        tm.assert_frame_equal(result, expected)
Example #7
0
    def test_last_subset(self, frame_or_series):
        ts = tm.makeTimeDataFrame(freq="12h")
        if frame_or_series is not DataFrame:
            ts = ts["A"]
        result = ts.last("10d")
        assert len(result) == 20

        ts = tm.makeTimeDataFrame(nper=30, freq="D")
        if frame_or_series is not DataFrame:
            ts = ts["A"]
        result = ts.last("10d")
        assert len(result) == 10

        result = ts.last("21D")
        expected = ts["2000-01-10":]
        tm.assert_equal(result, expected)

        result = ts.last("21D")
        expected = ts[-21:]
        tm.assert_equal(result, expected)

        result = ts[:0].last("3M")
        tm.assert_equal(result, ts[:0])
Example #8
0
    def test_first_subset(self, frame_or_series):
        ts = tm.makeTimeDataFrame(freq="12h")
        if frame_or_series is not DataFrame:
            ts = ts["A"]
        result = ts.first("10d")
        assert len(result) == 20

        ts = tm.makeTimeDataFrame(freq="D")
        if frame_or_series is not DataFrame:
            ts = ts["A"]
        result = ts.first("10d")
        assert len(result) == 10

        result = ts.first("3M")
        expected = ts[:"3/31/2000"]
        tm.assert_equal(result, expected)

        result = ts.first("21D")
        expected = ts[:21]
        tm.assert_equal(result, expected)

        result = ts[:0].first("3M")
        tm.assert_equal(result, ts[:0])
Example #9
0
def test_append_to_multiple_dropna_false(setup_path):
    df1 = tm.makeTimeDataFrame()
    df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
    df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
    df = concat([df1, df2], axis=1)

    with ensure_clean_store(setup_path) as store:

        # dropna=False shouldn't synchronize row indexes
        store.append_to_multiple({
            "df1a": ["A", "B"],
            "df2a": None
        },
                                 df,
                                 selector="df1a",
                                 dropna=False)

        msg = "all tables must have exactly the same nrows!"
        with pytest.raises(ValueError, match=msg):
            store.select_as_multiple(["df1a", "df2a"])

        assert not store.select("df1a").index.equals(
            store.select("df2a").index)
    def setup_method(self, method):
        TestPlotBase.setup_method(self, method)
        import matplotlib as mpl

        mpl.rcdefaults()

        self.tdf = tm.makeTimeDataFrame()
        self.hexbin_df = DataFrame(
            {
                "A": np.random.uniform(size=20),
                "B": np.random.uniform(size=20),
                "C": np.arange(20) + np.random.uniform(size=20),
            }
        )
Example #11
0
    def test_numpy_transpose(self):
        msg = "the 'axes' parameter is not supported"

        s = tm.makeFloatSeries()
        tm.assert_series_equal(np.transpose(s), s)

        with pytest.raises(ValueError, match=msg):
            np.transpose(s, axes=1)

        df = tm.makeTimeDataFrame()
        tm.assert_frame_equal(np.transpose(np.transpose(df)), df)

        with pytest.raises(ValueError, match=msg):
            np.transpose(df, axes=1)
Example #12
0
    def test_partial_set_invalid(self):

        # GH 4940
        # allow only setting of 'valid' values

        orig = tm.makeTimeDataFrame()

        # allow object conversion here
        df = orig.copy()
        df.loc["a", :] = df.iloc[0]
        exp = orig.append(Series(df.iloc[0], name="a"))
        tm.assert_frame_equal(df, exp)
        tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"]))
        assert df.index.dtype == "object"
Example #13
0
def test_unstack_multi_index_categorical_values():

    mi = tm.makeTimeDataFrame().stack().index.rename(["major", "minor"])
    ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")

    result = ser.unstack()

    dti = ser.index.levels[0]
    c = pd.Categorical(["foo"] * len(dti))
    expected = DataFrame(
        {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
        columns=pd.Index(list("ABCD"), name="minor"),
        index=dti.rename("major"),
    )
    tm.assert_frame_equal(result, expected)
Example #14
0
 def test_squeeze_axis(self):
     # axis argument
     df = tm.makeTimeDataFrame(nper=1).iloc[:, :1]
     assert df.shape == (1, 1)
     tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
     tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
     tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
     tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
     assert df.squeeze() == df.iloc[0, 0]
     msg = "No axis named 2 for object type DataFrame"
     with pytest.raises(ValueError, match=msg):
         df.squeeze(axis=2)
     msg = "No axis named x for object type DataFrame"
     with pytest.raises(ValueError, match=msg):
         df.squeeze(axis="x")
Example #15
0
    def test_numpy_transpose(self, frame_or_series):

        obj = tm.makeTimeDataFrame()
        obj = tm.get_obj(obj, frame_or_series)

        if frame_or_series is Series:
            # 1D -> np.transpose is no-op
            tm.assert_series_equal(np.transpose(obj), obj)

        # round-trip preserved
        tm.assert_equal(np.transpose(np.transpose(obj)), obj)

        msg = "the 'axes' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            np.transpose(obj, axes=1)
Example #16
0
    def setup_method(self, method):
        self.df = tm.makeTimeDataFrame()[:10]
        self.df["id1"] = (self.df["A"] > 0).astype(np.int64)
        self.df["id2"] = (self.df["B"] > 0).astype(np.int64)

        self.var_name = "var"
        self.value_name = "val"

        self.df1 = DataFrame([
            [1.067683, -1.110463, 0.20867],
            [-1.321405, 0.368915, -1.055342],
            [-0.807333, 0.08298, -0.873361],
        ])
        self.df1.columns = [list("ABC"), list("abc")]
        self.df1.columns.names = ["CAP", "low"]
Example #17
0
def test_append_to_multiple_dropna_false(setup_path):
    df1 = tm.makeTimeDataFrame()
    df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)
    df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan
    df = concat([df1, df2], axis=1)

    with ensure_clean_store(setup_path) as store:

        # dropna=False shouldn't synchronize row indexes
        store.append_to_multiple({
            "df1a": ["A", "B"],
            "df2a": None
        },
                                 df,
                                 selector="df1a",
                                 dropna=False)

        # TODO Update error message to desired message for this case
        msg = "Cannot select as multiple after appending with dropna=False"
        with pytest.raises(ValueError, match=msg):
            store.select_as_multiple(["df1a", "df2a"])

        assert not store.select("df1a").index.equals(
            store.select("df2a").index)
Example #18
0
def test_transform():
    data = Series(np.arange(9) // 3, index=np.arange(9))

    index = np.arange(9)
    np.random.shuffle(index)
    data = data.reindex(index)

    grouped = data.groupby(lambda x: x // 3)

    transformed = grouped.transform(lambda x: x * x.sum())
    assert transformed[7] == 12

    # GH 8046
    # make sure that we preserve the input order

    df = DataFrame(np.arange(6, dtype="int64").reshape(3, 2),
                   columns=["a", "b"],
                   index=[0, 2, 1])
    key = [0, 0, 1]
    expected = (df.sort_index().groupby(key).transform(
        lambda x: x - x.mean()).groupby(key).mean())
    result = df.groupby(key).transform(lambda x: x - x.mean()).groupby(
        key).mean()
    tm.assert_frame_equal(result, expected)

    def demean(arr):
        return arr - arr.mean()

    people = DataFrame(
        np.random.randn(5, 5),
        columns=["a", "b", "c", "d", "e"],
        index=["Joe", "Steve", "Wes", "Jim", "Travis"],
    )
    key = ["one", "two", "one", "two", "one"]
    result = people.groupby(key).transform(demean).groupby(key).mean()
    expected = people.groupby(key).apply(demean).groupby(key).mean()
    tm.assert_frame_equal(result, expected)

    # GH 8430
    df = tm.makeTimeDataFrame()
    g = df.groupby(pd.Grouper(freq="M"))
    g.transform(lambda x: x - 1)

    # GH 9700
    df = DataFrame({"a": range(5, 10), "b": range(5)})
    result = df.groupby("a").transform(max)
    expected = DataFrame({"b": range(5)})
    tm.assert_frame_equal(result, expected)
def test_mode(setup_path, mode):

    df = tm.makeTimeDataFrame()
    msg = r"[\S]* does not exist"
    with ensure_clean_path(setup_path) as path:

        # constructor
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                HDFStore(path, mode=mode)

        else:
            store = HDFStore(path, mode=mode)
            assert store._handle.mode == mode
            store.close()

    with ensure_clean_path(setup_path) as path:

        # context
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                with HDFStore(path, mode=mode) as store:
                    pass
        else:
            with HDFStore(path, mode=mode) as store:
                assert store._handle.mode == mode

    with ensure_clean_path(setup_path) as path:

        # conv write
        if mode in ["r", "r+"]:
            with pytest.raises(OSError, match=msg):
                df.to_hdf(path, "df", mode=mode)
            df.to_hdf(path, "df", mode="w")
        else:
            df.to_hdf(path, "df", mode=mode)

        # conv read
        if mode in ["w"]:
            msg = (
                "mode w is not allowed while performing a read. "
                r"Allowed modes are r, r\+ and a."
            )
            with pytest.raises(ValueError, match=msg):
                read_hdf(path, "df", mode=mode)
        else:
            result = read_hdf(path, "df", mode=mode)
            tm.assert_frame_equal(result, df)
Example #20
0
 def test_slice_locs_with_type_mismatch(self):
     df = tm.makeTimeDataFrame()
     stacked = df.stack()
     idx = stacked.index
     with pytest.raises(TypeError, match="^Level type mismatch"):
         idx.slice_locs((1, 3))
     with pytest.raises(TypeError, match="^Level type mismatch"):
         idx.slice_locs(df.index[5] + timedelta(seconds=30), (5, 2))
     df = tm.makeCustomDataframe(5, 5)
     stacked = df.stack()
     idx = stacked.index
     with pytest.raises(TypeError, match="^Level type mismatch"):
         idx.slice_locs(timedelta(seconds=30))
     # TODO: Try creating a UnicodeDecodeError in exception message
     with pytest.raises(TypeError, match="^Level type mismatch"):
         idx.slice_locs(df.index[1], (16, "a"))
Example #21
0
 def test_take(self):
     indices = [1, 5, -2, 6, 3, -1]
     for s in [tm.makeFloatSeries(), tm.makeStringSeries(), tm.makeObjectSeries()]:
         out = s.take(indices)
         expected = Series(
             data=s.values.take(indices), index=s.index.take(indices), dtype=s.dtype
         )
         tm.assert_series_equal(out, expected)
     for df in [tm.makeTimeDataFrame()]:
         out = df.take(indices)
         expected = DataFrame(
             data=df.values.take(indices, axis=0),
             index=df.index.take(indices),
             columns=df.columns,
         )
         tm.assert_frame_equal(out, expected)
def test_agg_grouping_is_list_tuple(ts):
    df = tm.makeTimeDataFrame()

    grouped = df.groupby(lambda x: x.year)
    grouper = grouped.grouper.groupings[0].grouper
    grouped.grouper.groupings[0] = Grouping(ts.index, list(grouper))

    result = grouped.agg(np.mean)
    expected = grouped.mean()
    tm.assert_frame_equal(result, expected)

    grouped.grouper.groupings[0] = Grouping(ts.index, tuple(grouper))

    result = grouped.agg(np.mean)
    expected = grouped.mean()
    tm.assert_frame_equal(result, expected)
Example #23
0
def test_slice_locs():
    df = tm.makeTimeDataFrame()
    stacked = df.stack()
    idx = stacked.index

    slob = slice(*idx.slice_locs(df.index[5], df.index[15]))
    sliced = stacked[slob]
    expected = df[5:16].stack()
    tm.assert_almost_equal(sliced.values, expected.values)

    slob = slice(*idx.slice_locs(df.index[5] +
                                 timedelta(seconds=30), df.index[15] -
                                 timedelta(seconds=30)))
    sliced = stacked[slob]
    expected = df[6:15].stack()
    tm.assert_almost_equal(sliced.values, expected.values)
Example #24
0
    def test_take_invalid_kwargs(self):
        indices = [-3, 2, 0, 1]
        s = tm.makeFloatSeries()
        df = tm.makeTimeDataFrame()

        for obj in (s, df):
            msg = r"take\(\) got an unexpected keyword argument 'foo'"
            with pytest.raises(TypeError, match=msg):
                obj.take(indices, foo=2)

            msg = "the 'out' parameter is not supported"
            with pytest.raises(ValueError, match=msg):
                obj.take(indices, out=indices)

            msg = "the 'mode' parameter is not supported"
            with pytest.raises(ValueError, match=msg):
                obj.take(indices, mode="clip")
Example #25
0
    def test_take_invalid_kwargs(self, frame_or_series):
        indices = [-3, 2, 0, 1]

        obj = tm.makeTimeDataFrame()
        obj = tm.get_obj(obj, frame_or_series)

        msg = r"take\(\) got an unexpected keyword argument 'foo'"
        with pytest.raises(TypeError, match=msg):
            obj.take(indices, foo=2)

        msg = "the 'out' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            obj.take(indices, out=indices)

        msg = "the 'mode' parameter is not supported"
        with pytest.raises(ValueError, match=msg):
            obj.take(indices, mode="clip")
Example #26
0
def X(request):

    if "series" in request.param:
        return tm.makeTimeSeries(freq="D")
    elif "dataframe" in request.param:
        result = tm.makeTimeDataFrame(freq="D").drop(columns="A")
        if "date_col_str" in request.param:
            return result.assign(index=lambda x: x.index.astype(str)).set_index("index")
        elif "len_<_3" in request.param:
            return result.iloc[:2, :]
        elif "wo_date_col" in request.param:
            result.index.name = "some_other_index_name"
            return result
        else:
            raise ValueError("Invalid X fixture parameter")
    else:
        raise ValueError("Invalid X fixture parameter")
Example #27
0
def test_invalid_filtering(setup_path):

    # can't use more than one filter (atm)

    df = tm.makeTimeDataFrame()

    with ensure_clean_store(setup_path) as store:
        store.put("df", df, format="table")

        msg = "unable to collapse Joint Filters"
        # not implemented
        with pytest.raises(NotImplementedError, match=msg):
            store.select("df", "columns=['A'] | columns=['B']")

        # in theory we could deal with this
        with pytest.raises(NotImplementedError, match=msg):
            store.select("df", "columns=['A','B'] & columns=['C']")
Example #28
0
    def test_reshaping_multi_index_categorical(self):

        cols = ["ItemA", "ItemB", "ItemC"]
        data = {c: tm.makeTimeDataFrame() for c in cols}
        df = pd.concat({c: data[c].stack() for c in data}, axis="columns")
        df.index.names = ["major", "minor"]
        df["str"] = "foo"

        df["category"] = df["str"].astype("category")
        result = df["category"].unstack()

        dti = df.index.levels[0]
        c = Categorical(["foo"] * len(dti))
        expected = DataFrame(
            {"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
            columns=Index(list("ABCD"), name="minor"),
            index=dti.rename("major"),
        )
        tm.assert_frame_equal(result, expected)
Example #29
0
def test_resample_frame_basic():
    df = tm.makeTimeDataFrame()

    b = Grouper(freq="M")
    g = df.groupby(b)

    # check all cython functions work
    funcs = ["add", "mean", "prod", "min", "max", "var"]
    for f in funcs:
        g._cython_agg_general(f)

    result = df.resample("A").mean()
    tm.assert_series_equal(result["A"], df["A"].resample("A").mean())

    result = df.resample("M").mean()
    tm.assert_series_equal(result["A"], df["A"].resample("M").mean())

    df.resample("M", kind="period").mean()
    df.resample("W-WED", kind="period").mean()
Example #30
0
    def test_getitem_setitem_non_ix_labels(self):
        df = tm.makeTimeDataFrame()

        start, end = df.index[[5, 10]]

        result = df.loc[start:end]
        result2 = df[start:end]
        expected = df[5:11]
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(result2, expected)

        result = df.copy()
        result.loc[start:end] = 0
        result2 = df.copy()
        result2[start:end] = 0
        expected = df.copy()
        expected[5:11] = 0
        tm.assert_frame_equal(result, expected)
        tm.assert_frame_equal(result2, expected)