예제 #1
0
def test_apply_reduce_Series(float_frame):
    float_frame["A"].iloc[::2] = np.nan
    expected = float_frame.mean(1)
    result = float_frame.apply(np.mean, axis=1)
    tm.assert_series_equal(result, expected)
예제 #2
0
 def test_ufunc_at(self):
     s = pd.Series([0, 1, 2], index=[1, 2, 3], name="x")
     np.add.at(s, [0, 2], 10)
     expected = pd.Series([10, 1, 12], index=[1, 2, 3], name="x")
     tm.assert_series_equal(s, expected)
예제 #3
0
    def test_multiindex_assignment(self):

        # GH3777 part 2

        # mixed dtype
        df = DataFrame(
            np.random.randint(5, 10, size=9).reshape(3, 3),
            columns=list("abc"),
            index=[[4, 4, 8], [8, 10, 12]],
        )
        df["d"] = np.nan
        arr = np.array([0.0, 1.0])

        df.loc[4, "d"] = arr
        tm.assert_series_equal(df.loc[4, "d"],
                               Series(arr, index=[8, 10], name="d"))

        # single dtype
        df = DataFrame(
            np.random.randint(5, 10, size=9).reshape(3, 3),
            columns=list("abc"),
            index=[[4, 4, 8], [8, 10, 12]],
        )

        df.loc[4, "c"] = arr
        exp = Series(arr, index=[8, 10], name="c", dtype="float64")
        tm.assert_series_equal(df.loc[4, "c"], exp)

        # scalar ok
        df.loc[4, "c"] = 10
        exp = Series(10, index=[8, 10], name="c", dtype="float64")
        tm.assert_series_equal(df.loc[4, "c"], exp)

        # invalid assignments
        msg = ("cannot set using a multi-index selection indexer "
               "with a different length than the value")
        with pytest.raises(ValueError, match=msg):
            df.loc[4, "c"] = [0, 1, 2, 3]

        with pytest.raises(ValueError, match=msg):
            df.loc[4, "c"] = [0]

        # groupby example
        NUM_ROWS = 100
        NUM_COLS = 10
        col_names = [
            "A" + num for num in map(str,
                                     np.arange(NUM_COLS).tolist())
        ]
        index_cols = col_names[:5]

        df = DataFrame(
            np.random.randint(5, size=(NUM_ROWS, NUM_COLS)),
            dtype=np.int64,
            columns=col_names,
        )
        df = df.set_index(index_cols).sort_index()
        grp = df.groupby(level=index_cols[:4])
        df["new_col"] = np.nan

        f_index = np.arange(5)

        def f(name, df2):
            return Series(np.arange(df2.shape[0]),
                          name=df2.index.values[0]).reindex(f_index)

        # FIXME: dont leave commented-out
        # TODO(wesm): unused?
        # new_df = pd.concat([f(name, df2) for name, df2 in grp], axis=1).T

        # we are actually operating on a copy here
        # but in this case, that's ok
        for name, df2 in grp:
            new_vals = np.arange(df2.shape[0])
            df.loc[name, "new_col"] = new_vals
예제 #4
0
    def test_iloc_exceeds_bounds(self):

        # GH6296
        # iloc should allow indexers that exceed the bounds
        df = DataFrame(np.random.random_sample((20, 5)), columns=list("ABCDE"))

        # lists of positions should raise IndexError!
        msg = "positional indexers are out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            df.iloc[:, [0, 1, 2, 3, 4, 5]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[1, 30]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[1, -30]]
        with pytest.raises(IndexError, match=msg):
            df.iloc[[100]]

        s = df["A"]
        with pytest.raises(IndexError, match=msg):
            s.iloc[[100]]
        with pytest.raises(IndexError, match=msg):
            s.iloc[[-100]]

        # still raise on a single indexer
        msg = "single positional indexer is out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            df.iloc[30]
        with pytest.raises(IndexError, match=msg):
            df.iloc[-30]

        # GH10779
        # single positive/negative indexer exceeding Series bounds should raise
        # an IndexError
        with pytest.raises(IndexError, match=msg):
            s.iloc[30]
        with pytest.raises(IndexError, match=msg):
            s.iloc[-30]

        # slices are ok
        result = df.iloc[:, 4:10]  # 0 < start < len < stop
        expected = df.iloc[:, 4:]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -4:-10]  # stop < 0 < start < len
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:4:-1]  # 0 < stop < len < start (down)
        expected = df.iloc[:, :4:-1]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 4:-10:-1]  # stop < 0 < start < len (down)
        expected = df.iloc[:, 4::-1]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -10:4]  # start < 0 < stop < len
        expected = df.iloc[:, :4]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:4]  # 0 < stop < len < start
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, -10:-11:-1]  # stop < start < 0 < len (down)
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        result = df.iloc[:, 10:11]  # 0 < len < start < stop
        expected = df.iloc[:, :0]
        tm.assert_frame_equal(result, expected)

        # slice bounds exceeding is ok
        result = s.iloc[18:30]
        expected = s.iloc[18:]
        tm.assert_series_equal(result, expected)

        result = s.iloc[30:]
        expected = s.iloc[:0]
        tm.assert_series_equal(result, expected)

        result = s.iloc[30::-1]
        expected = s.iloc[::-1]
        tm.assert_series_equal(result, expected)

        # doc example
        def check(result, expected):
            str(result)
            result.dtypes
            tm.assert_frame_equal(result, expected)

        dfl = DataFrame(np.random.randn(5, 2), columns=list("AB"))
        check(dfl.iloc[:, 2:3], DataFrame(index=dfl.index))
        check(dfl.iloc[:, 1:3], dfl.iloc[:, [1]])
        check(dfl.iloc[4:6], dfl.iloc[[4]])

        msg = "positional indexers are out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            dfl.iloc[[4, 5, 6]]
        msg = "single positional indexer is out-of-bounds"
        with pytest.raises(IndexError, match=msg):
            dfl.iloc[:, 4]
예제 #5
0
 def test_iloc_setitem_td64_values_cast_na(self, value):
     # GH#18586
     series = Series([0, 1, 2], dtype="timedelta64[ns]")
     series.iloc[0] = value
     expected = Series([NaT, 1, 2], dtype="timedelta64[ns]")
     tm.assert_series_equal(series, expected)
예제 #6
0
def test_store_multiindex(setup_path):

    # validate multi-index names
    # GH 5527
    with ensure_clean_store(setup_path) as store:

        def make_index(names=None):
            return MultiIndex.from_tuples(
                [(datetime.datetime(2013, 12, d), s, t) for d in range(1, 3)
                 for s in range(2) for t in range(3)],
                names=names,
            )

        # no names
        _maybe_remove(store, "df")
        df = DataFrame(np.zeros((12, 2)),
                       columns=["a", "b"],
                       index=make_index())
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)

        # partial names
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", None, None]),
        )
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)

        # series
        _maybe_remove(store, "s")
        s = Series(np.zeros(12), index=make_index(["date", None, None]))
        store.append("s", s)
        xp = Series(np.zeros(12),
                    index=make_index(["date", "level_1", "level_2"]))
        tm.assert_series_equal(store.select("s"), xp)

        # dup with column
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "a", "t"]),
        )
        msg = "duplicate names/columns in the multi-index when storing as a table"
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # dup within level
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "date", "date"]),
        )
        with pytest.raises(ValueError, match=msg):
            store.append("df", df)

        # fully names
        _maybe_remove(store, "df")
        df = DataFrame(
            np.zeros((12, 2)),
            columns=["a", "b"],
            index=make_index(["date", "s", "t"]),
        )
        store.append("df", df)
        tm.assert_frame_equal(store.select("df"), df)
예제 #7
0
    def test_shift(self, datetime_series):
        shifted = datetime_series.shift(1)
        unshifted = shifted.shift(-1)

        tm.assert_index_equal(shifted.index, datetime_series.index)
        tm.assert_index_equal(unshifted.index, datetime_series.index)
        tm.assert_numpy_array_equal(
            unshifted.dropna().values, datetime_series.values[:-1]
        )

        offset = BDay()
        shifted = datetime_series.shift(1, freq=offset)
        unshifted = shifted.shift(-1, freq=offset)

        tm.assert_series_equal(unshifted, datetime_series)

        unshifted = datetime_series.shift(0, freq=offset)
        tm.assert_series_equal(unshifted, datetime_series)

        shifted = datetime_series.shift(1, freq="B")
        unshifted = shifted.shift(-1, freq="B")

        tm.assert_series_equal(unshifted, datetime_series)

        # corner case
        unshifted = datetime_series.shift(0)
        tm.assert_series_equal(unshifted, datetime_series)

        # Shifting with PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.shift(1)
        unshifted = shifted.shift(-1)
        tm.assert_index_equal(shifted.index, ps.index)
        tm.assert_index_equal(unshifted.index, ps.index)
        tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1])

        shifted2 = ps.shift(1, "B")
        shifted3 = ps.shift(1, BDay())
        tm.assert_series_equal(shifted2, shifted3)
        tm.assert_series_equal(ps, shifted2.shift(-1, "B"))

        msg = "Given freq D does not match PeriodIndex freq B"
        with pytest.raises(ValueError, match=msg):
            ps.shift(freq="D")

        # legacy support
        shifted4 = ps.shift(1, freq="B")
        tm.assert_series_equal(shifted2, shifted4)

        shifted5 = ps.shift(1, freq=BDay())
        tm.assert_series_equal(shifted5, shifted4)

        # 32-bit taking
        # GH#8129
        index = date_range("2000-01-01", periods=5)
        for dtype in ["int32", "int64"]:
            s1 = Series(np.arange(5, dtype=dtype), index=index)
            p = s1.iloc[1]
            result = s1.shift(periods=p)
            expected = Series([np.nan, 0, 1, 2, 3], index=index)
            tm.assert_series_equal(result, expected)

        # GH#8260
        # with tz
        s = Series(
            date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
        )
        result = s - s.shift()

        exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
        tm.assert_series_equal(result, exp)

        # incompat tz
        s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo")
        msg = "DatetimeArray subtraction must have the same timezones or no timezones"
        with pytest.raises(TypeError, match=msg):
            s - s2
예제 #8
0
 def _check(result, expected):
     if isinstance(result, Series):
         tm.assert_series_equal(result, expected)
     else:
         tm.assert_index_equal(result, expected)
예제 #9
0
def test_slice_replace():
    values = Series(["short", "a bit longer", "evenlongerthanthat", "", np.nan])

    exp = Series(["shrt", "a it longer", "evnlongerthanthat", "", np.nan])
    result = values.str.slice_replace(2, 3)
    tm.assert_series_equal(result, exp)

    exp = Series(["shzrt", "a zit longer", "evznlongerthanthat", "z", np.nan])
    result = values.str.slice_replace(2, 3, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan])
    result = values.str.slice_replace(2, 2, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["shzort", "a zbit longer", "evzenlongerthanthat", "z", np.nan])
    result = values.str.slice_replace(2, 1, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["shorz", "a bit longez", "evenlongerthanthaz", "z", np.nan])
    result = values.str.slice_replace(-1, None, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["zrt", "zer", "zat", "z", np.nan])
    result = values.str.slice_replace(None, -2, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["shortz", "a bit znger", "evenlozerthanthat", "z", np.nan])
    result = values.str.slice_replace(6, 8, "z")
    tm.assert_series_equal(result, exp)

    exp = Series(["zrt", "a zit longer", "evenlongzerthanthat", "z", np.nan])
    result = values.str.slice_replace(-10, 3, "z")
    tm.assert_series_equal(result, exp)
예제 #10
0
def assert_series_or_index_equal(left, right):
    if isinstance(left, Series):
        tm.assert_series_equal(left, right)
    else:  # Index
        tm.assert_index_equal(left, right)
예제 #11
0
def test_ismethods():
    values = ["A", "b", "Xy", "4", "3A", "", "TT", "55", "-", "  "]
    str_s = Series(values)
    alnum_e = [True, True, True, True, True, False, True, True, False, False]
    alpha_e = [True, True, True, False, False, False, True, False, False, False]
    digit_e = [False, False, False, True, False, False, False, True, False, False]

    # TODO: unused
    num_e = [  # noqa
        False,
        False,
        False,
        True,
        False,
        False,
        False,
        True,
        False,
        False,
    ]

    space_e = [False, False, False, False, False, False, False, False, False, True]
    lower_e = [False, True, False, False, False, False, False, False, False, False]
    upper_e = [True, False, False, False, True, False, True, False, False, False]
    title_e = [True, False, True, False, True, False, False, False, False, False]

    tm.assert_series_equal(str_s.str.isalnum(), Series(alnum_e))
    tm.assert_series_equal(str_s.str.isalpha(), Series(alpha_e))
    tm.assert_series_equal(str_s.str.isdigit(), Series(digit_e))
    tm.assert_series_equal(str_s.str.isspace(), Series(space_e))
    tm.assert_series_equal(str_s.str.islower(), Series(lower_e))
    tm.assert_series_equal(str_s.str.isupper(), Series(upper_e))
    tm.assert_series_equal(str_s.str.istitle(), Series(title_e))

    assert str_s.str.isalnum().tolist() == [v.isalnum() for v in values]
    assert str_s.str.isalpha().tolist() == [v.isalpha() for v in values]
    assert str_s.str.isdigit().tolist() == [v.isdigit() for v in values]
    assert str_s.str.isspace().tolist() == [v.isspace() for v in values]
    assert str_s.str.islower().tolist() == [v.islower() for v in values]
    assert str_s.str.isupper().tolist() == [v.isupper() for v in values]
    assert str_s.str.istitle().tolist() == [v.istitle() for v in values]
예제 #12
0
def test_empty_str_methods():
    empty_str = empty = Series(dtype=object)
    empty_int = Series(dtype="int64")
    empty_bool = Series(dtype=bool)
    empty_bytes = Series(dtype=object)

    # GH7241
    # (extract) on empty series

    tm.assert_series_equal(empty_str, empty.str.cat(empty))
    assert "" == empty.str.cat()
    tm.assert_series_equal(empty_str, empty.str.title())
    tm.assert_series_equal(empty_int, empty.str.count("a"))
    tm.assert_series_equal(empty_bool, empty.str.contains("a"))
    tm.assert_series_equal(empty_bool, empty.str.startswith("a"))
    tm.assert_series_equal(empty_bool, empty.str.endswith("a"))
    tm.assert_series_equal(empty_str, empty.str.lower())
    tm.assert_series_equal(empty_str, empty.str.upper())
    tm.assert_series_equal(empty_str, empty.str.replace("a", "b"))
    tm.assert_series_equal(empty_str, empty.str.repeat(3))
    tm.assert_series_equal(empty_bool, empty.str.match("^a"))
    tm.assert_frame_equal(
        DataFrame(columns=[0], dtype=str), empty.str.extract("()", expand=True)
    )
    tm.assert_frame_equal(
        DataFrame(columns=[0, 1], dtype=str), empty.str.extract("()()", expand=True)
    )
    tm.assert_series_equal(empty_str, empty.str.extract("()", expand=False))
    tm.assert_frame_equal(
        DataFrame(columns=[0, 1], dtype=str),
        empty.str.extract("()()", expand=False),
    )
    tm.assert_frame_equal(DataFrame(dtype=str), empty.str.get_dummies())
    tm.assert_series_equal(empty_str, empty_str.str.join(""))
    tm.assert_series_equal(empty_int, empty.str.len())
    tm.assert_series_equal(empty_str, empty_str.str.findall("a"))
    tm.assert_series_equal(empty_int, empty.str.find("a"))
    tm.assert_series_equal(empty_int, empty.str.rfind("a"))
    tm.assert_series_equal(empty_str, empty.str.pad(42))
    tm.assert_series_equal(empty_str, empty.str.center(42))
    tm.assert_series_equal(empty_str, empty.str.split("a"))
    tm.assert_series_equal(empty_str, empty.str.rsplit("a"))
    tm.assert_series_equal(empty_str, empty.str.partition("a", expand=False))
    tm.assert_series_equal(empty_str, empty.str.rpartition("a", expand=False))
    tm.assert_series_equal(empty_str, empty.str.slice(stop=1))
    tm.assert_series_equal(empty_str, empty.str.slice(step=1))
    tm.assert_series_equal(empty_str, empty.str.strip())
    tm.assert_series_equal(empty_str, empty.str.lstrip())
    tm.assert_series_equal(empty_str, empty.str.rstrip())
    tm.assert_series_equal(empty_str, empty.str.wrap(42))
    tm.assert_series_equal(empty_str, empty.str.get(0))
    tm.assert_series_equal(empty_str, empty_bytes.str.decode("ascii"))
    tm.assert_series_equal(empty_bytes, empty.str.encode("ascii"))
    # ismethods should always return boolean (GH 29624)
    tm.assert_series_equal(empty_bool, empty.str.isalnum())
    tm.assert_series_equal(empty_bool, empty.str.isalpha())
    tm.assert_series_equal(empty_bool, empty.str.isdigit())
    tm.assert_series_equal(empty_bool, empty.str.isspace())
    tm.assert_series_equal(empty_bool, empty.str.islower())
    tm.assert_series_equal(empty_bool, empty.str.isupper())
    tm.assert_series_equal(empty_bool, empty.str.istitle())
    tm.assert_series_equal(empty_bool, empty.str.isnumeric())
    tm.assert_series_equal(empty_bool, empty.str.isdecimal())
    tm.assert_series_equal(empty_str, empty.str.capitalize())
    tm.assert_series_equal(empty_str, empty.str.swapcase())
    tm.assert_series_equal(empty_str, empty.str.normalize("NFC"))

    table = str.maketrans("a", "b")
    tm.assert_series_equal(empty_str, empty.str.translate(table))
예제 #13
0
def test_apply_attach_name_axis1(float_frame):
    result = float_frame.apply(lambda x: x.name, axis=1)
    expected = Series(float_frame.index, index=float_frame.index)
    tm.assert_series_equal(result, expected)
예제 #14
0
def test_apply_attach_name(float_frame):
    result = float_frame.apply(lambda x: x.name)
    expected = Series(float_frame.columns, index=float_frame.columns)
    tm.assert_series_equal(result, expected)
예제 #15
0
    def test_categorical_delegations(self):

        # invalid accessor
        msg = r"Can only use \.cat accessor with a 'category' dtype"
        with pytest.raises(AttributeError, match=msg):
            Series([1, 2, 3]).cat
        with pytest.raises(AttributeError, match=msg):
            Series([1, 2, 3]).cat()
        with pytest.raises(AttributeError, match=msg):
            Series(["a", "b", "c"]).cat
        with pytest.raises(AttributeError, match=msg):
            Series(np.arange(5.0)).cat
        with pytest.raises(AttributeError, match=msg):
            Series([Timestamp("20130101")]).cat

        # Series should delegate calls to '.categories', '.codes', '.ordered'
        # and the methods '.set_categories()' 'drop_unused_categories()' to the
        # categorical
        ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["a", "b", "c"])
        tm.assert_index_equal(ser.cat.categories, exp_categories)
        ser.cat.categories = [1, 2, 3]
        exp_categories = Index([1, 2, 3])
        tm.assert_index_equal(ser.cat.categories, exp_categories)

        exp_codes = Series([0, 1, 2, 0], dtype="int8")
        tm.assert_series_equal(ser.cat.codes, exp_codes)

        assert ser.cat.ordered
        ser = ser.cat.as_unordered()
        assert not ser.cat.ordered
        return_value = ser.cat.as_ordered(inplace=True)
        assert return_value is None
        assert ser.cat.ordered

        # reorder
        ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        exp_categories = Index(["c", "b", "a"])
        exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_)
        ser = ser.cat.set_categories(["c", "b", "a"])
        tm.assert_index_equal(ser.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(ser.__array__(), exp_values)

        # remove unused categories
        ser = Series(Categorical(["a", "b", "b", "a"], categories=["a", "b", "c"]))
        exp_categories = Index(["a", "b"])
        exp_values = np.array(["a", "b", "b", "a"], dtype=np.object_)
        ser = ser.cat.remove_unused_categories()
        tm.assert_index_equal(ser.cat.categories, exp_categories)
        tm.assert_numpy_array_equal(ser.values.__array__(), exp_values)
        tm.assert_numpy_array_equal(ser.__array__(), exp_values)

        # This method is likely to be confused, so test that it raises an error
        # on wrong inputs:
        msg = "'Series' object has no attribute 'set_categories'"
        with pytest.raises(AttributeError, match=msg):
            ser.set_categories([4, 3, 2, 1])

        # right: ser.cat.set_categories([4,3,2,1])

        # GH#18862 (let Series.cat.rename_categories take callables)
        ser = Series(Categorical(["a", "b", "c", "a"], ordered=True))
        result = ser.cat.rename_categories(lambda x: x.upper())
        expected = Series(
            Categorical(["A", "B", "C", "A"], categories=["A", "B", "C"], ordered=True)
        )
        tm.assert_series_equal(result, expected)
예제 #16
0
def test_str_accessor_in_apply_func():
    # https://github.com/pandas-dev/pandas/issues/38979
    df = DataFrame(zip("abc", "def"))
    expected = Series(["A/D", "B/E", "C/F"])
    result = df.apply(lambda f: "/".join(f.str.upper()), axis=1)
    tm.assert_series_equal(result, expected)
예제 #17
0
    def test_grouper_creation_bug(self):

        # GH 8795
        df = DataFrame({"A": [0, 0, 1, 1, 2, 2], "B": [1, 2, 3, 4, 5, 6]})
        g = df.groupby("A")
        expected = g.sum()

        g = df.groupby(pd.Grouper(key="A"))
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        result = g.apply(lambda x: x.sum())
        tm.assert_frame_equal(result, expected)

        g = df.groupby(pd.Grouper(key="A", axis=0))
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # GH14334
        # pd.Grouper(key=...) may be passed in a list
        df = DataFrame({
            "A": [0, 0, 0, 1, 1, 1],
            "B": [1, 1, 2, 2, 3, 3],
            "C": [1, 2, 3, 4, 5, 6]
        })
        # Group by single column
        expected = df.groupby("A").sum()
        g = df.groupby([pd.Grouper(key="A")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group by two columns
        # using a combination of strings and Grouper objects
        expected = df.groupby(["A", "B"]).sum()

        # Group with two Grouper objects
        g = df.groupby([pd.Grouper(key="A"), pd.Grouper(key="B")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group with a string and a Grouper object
        g = df.groupby(["A", pd.Grouper(key="B")])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # Group with a Grouper object and a string
        g = df.groupby([pd.Grouper(key="A"), "B"])
        result = g.sum()
        tm.assert_frame_equal(result, expected)

        # GH8866
        s = Series(
            np.arange(8, dtype="int64"),
            index=pd.MultiIndex.from_product(
                [list("ab"),
                 range(2),
                 date_range("20130101", periods=2)],
                names=["one", "two", "three"],
            ),
        )
        result = s.groupby(pd.Grouper(level="three", freq="M")).sum()
        expected = Series([28],
                          index=Index([Timestamp("2013-01-31")],
                                      freq="M",
                                      name="three"))
        tm.assert_series_equal(result, expected)

        # just specifying a level breaks
        result = s.groupby(pd.Grouper(level="one")).sum()
        expected = s.groupby(level="one").sum()
        tm.assert_series_equal(result, expected)
예제 #18
0
def test_getitem_setitem_datetimeindex():
    N = 50
    # testing with timezone, GH #2785
    rng = date_range("1/1/1990", periods=N, freq="H", tz="US/Eastern")
    ts = Series(np.random.randn(N), index=rng)

    result = ts["1990-01-01 04:00:00"]
    expected = ts[4]
    assert result == expected

    result = ts.copy()
    result["1990-01-01 04:00:00"] = 0
    result["1990-01-01 04:00:00"] = ts[4]
    tm.assert_series_equal(result, ts)

    result = ts["1990-01-01 04:00:00":"1990-01-01 07:00:00"]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = 0
    result["1990-01-01 04:00:00":"1990-01-01 07:00:00"] = ts[4:8]
    tm.assert_series_equal(result, ts)

    lb = "1990-01-01 04:00:00"
    rb = "1990-01-01 07:00:00"
    # GH#18435 strings get a pass from tzawareness compat
    result = ts[(ts.index >= lb) & (ts.index <= rb)]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    lb = "1990-01-01 04:00:00-0500"
    rb = "1990-01-01 07:00:00-0500"
    result = ts[(ts.index >= lb) & (ts.index <= rb)]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    # But we do not give datetimes a pass on tzawareness compat
    # TODO: do the same with Timestamps and dt64
    msg = "Cannot compare tz-naive and tz-aware datetime-like objects"
    naive = datetime(1990, 1, 1, 4)
    with tm.assert_produces_warning(FutureWarning):
        # GH#36148 will require tzawareness compat
        result = ts[naive]
    expected = ts[4]
    assert result == expected

    result = ts.copy()
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        # GH#36148 will require tzawareness compat
        result[datetime(1990, 1, 1, 4)] = 0
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        # GH#36148 will require tzawareness compat
        result[datetime(1990, 1, 1, 4)] = ts[4]
    tm.assert_series_equal(result, ts)

    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        # GH#36148 will require tzawareness compat
        result = ts[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        # GH#36148 will require tzawareness compat
        result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = 0
    with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
        # GH#36148 will require tzawareness compat
        result[datetime(1990, 1, 1, 4):datetime(1990, 1, 1, 7)] = ts[4:8]
    tm.assert_series_equal(result, ts)

    lb = datetime(1990, 1, 1, 4)
    rb = datetime(1990, 1, 1, 7)
    msg = r"Invalid comparison between dtype=datetime64\[ns, US/Eastern\] and datetime"
    with pytest.raises(TypeError, match=msg):
        # tznaive vs tzaware comparison is invalid
        # see GH#18376, GH#18162
        ts[(ts.index >= lb) & (ts.index <= rb)]

    lb = Timestamp(datetime(1990, 1, 1, 4)).tz_localize(rng.tzinfo)
    rb = Timestamp(datetime(1990, 1, 1, 7)).tz_localize(rng.tzinfo)
    result = ts[(ts.index >= lb) & (ts.index <= rb)]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts[ts.index[4]]
    expected = ts[4]
    assert result == expected

    result = ts[ts.index[4:8]]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    result[ts.index[4:8]] = 0
    result.iloc[4:8] = ts.iloc[4:8]
    tm.assert_series_equal(result, ts)

    # also test partial date slicing
    result = ts["1990-01-02"]
    expected = ts[24:48]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    result["1990-01-02"] = 0
    result["1990-01-02"] = ts[24:48]
    tm.assert_series_equal(result, ts)
예제 #19
0
    def test_tshift(self, datetime_series):
        # TODO: remove this test when tshift deprecation is enforced

        # PeriodIndex
        ps = tm.makePeriodSeries()
        shifted = ps.tshift(1)
        unshifted = shifted.tshift(-1)

        tm.assert_series_equal(unshifted, ps)

        shifted2 = ps.tshift(freq="B")
        tm.assert_series_equal(shifted, shifted2)

        shifted3 = ps.tshift(freq=BDay())
        tm.assert_series_equal(shifted, shifted3)

        msg = "Given freq M does not match PeriodIndex freq B"
        with pytest.raises(ValueError, match=msg):
            ps.tshift(freq="M")

        # DatetimeIndex
        shifted = datetime_series.tshift(1)
        unshifted = shifted.tshift(-1)

        tm.assert_series_equal(datetime_series, unshifted)

        shifted2 = datetime_series.tshift(freq=datetime_series.index.freq)
        tm.assert_series_equal(shifted, shifted2)

        inferred_ts = Series(
            datetime_series.values, Index(np.asarray(datetime_series.index)), name="ts"
        )
        shifted = inferred_ts.tshift(1)
        expected = datetime_series.tshift(1)
        expected.index = expected.index._with_freq(None)
        tm.assert_series_equal(shifted, expected)

        unshifted = shifted.tshift(-1)
        tm.assert_series_equal(unshifted, inferred_ts)

        no_freq = datetime_series[[0, 5, 7]]
        msg = "Freq was not set in the index hence cannot be inferred"
        with pytest.raises(ValueError, match=msg):
            no_freq.tshift()
예제 #20
0
def test_getitem_setitem_periodindex():

    N = 50
    rng = period_range("1/1/1990", periods=N, freq="H")
    ts = Series(np.random.randn(N), index=rng)

    result = ts["1990-01-01 04"]
    expected = ts[4]
    assert result == expected

    result = ts.copy()
    result["1990-01-01 04"] = 0
    result["1990-01-01 04"] = ts[4]
    tm.assert_series_equal(result, ts)

    result = ts["1990-01-01 04":"1990-01-01 07"]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    result["1990-01-01 04":"1990-01-01 07"] = 0
    result["1990-01-01 04":"1990-01-01 07"] = ts[4:8]
    tm.assert_series_equal(result, ts)

    lb = "1990-01-01 04"
    rb = "1990-01-01 07"
    result = ts[(ts.index >= lb) & (ts.index <= rb)]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    # GH 2782
    result = ts[ts.index[4]]
    expected = ts[4]
    assert result == expected

    result = ts[ts.index[4:8]]
    expected = ts[4:8]
    tm.assert_series_equal(result, expected)

    result = ts.copy()
    result[ts.index[4:8]] = 0
    result.iloc[4:8] = ts.iloc[4:8]
    tm.assert_series_equal(result, ts)
예제 #21
0
 def test_shift_int(self, datetime_series):
     ts = datetime_series.astype(int)
     shifted = ts.shift(1)
     expected = ts.astype(float).shift(1)
     tm.assert_series_equal(shifted, expected)
예제 #22
0
def test_groupby_average_dup_values(dups):
    result = dups.groupby(level=0).mean()
    expected = dups.groupby(dups.index).mean()
    tm.assert_series_equal(result, expected)
예제 #23
0
 def test_indexing_zerodim_np_array(self):
     # GH24919
     df = DataFrame([[1, 2], [3, 4]])
     result = df.iloc[np.array(0)]
     s = Series([1, 2], name=0)
     tm.assert_series_equal(result, s)
예제 #24
0
 def compare(slobj):
     result = ts2[slobj].copy()
     result = result.sort_index()
     expected = ts[slobj]
     expected.index = expected.index._with_freq(None)
     tm.assert_series_equal(result, expected)
예제 #25
0
    def test_equals_op(self):
        # GH9947, GH10637
        index_a = self.create_index()
        if isinstance(index_a, PeriodIndex):
            pytest.skip("Skip check for PeriodIndex")

        n = len(index_a)
        index_b = index_a[0:-1]
        index_c = index_a[0:-1].append(index_a[-2:-1])
        index_d = index_a[0:1]

        msg = "Lengths must match|could not be broadcast"
        with pytest.raises(ValueError, match=msg):
            index_a == index_b
        expected1 = np.array([True] * n)
        expected2 = np.array([True] * (n - 1) + [False])
        tm.assert_numpy_array_equal(index_a == index_a, expected1)
        tm.assert_numpy_array_equal(index_a == index_c, expected2)

        # test comparisons with numpy arrays
        array_a = np.array(index_a)
        array_b = np.array(index_a[0:-1])
        array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
        array_d = np.array(index_a[0:1])
        with pytest.raises(ValueError, match=msg):
            index_a == array_b
        tm.assert_numpy_array_equal(index_a == array_a, expected1)
        tm.assert_numpy_array_equal(index_a == array_c, expected2)

        # test comparisons with Series
        series_a = Series(array_a)
        series_b = Series(array_b)
        series_c = Series(array_c)
        series_d = Series(array_d)
        with pytest.raises(ValueError, match=msg):
            index_a == series_b

        tm.assert_numpy_array_equal(index_a == series_a, expected1)
        tm.assert_numpy_array_equal(index_a == series_c, expected2)

        # cases where length is 1 for one of them
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == index_d
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == series_d
        with pytest.raises(ValueError, match="Lengths must match"):
            index_a == array_d
        msg = "Can only compare identically-labeled Series objects"
        with pytest.raises(ValueError, match=msg):
            series_a == series_d
        with pytest.raises(ValueError, match="Lengths must match"):
            series_a == array_d

        # comparing with a scalar should broadcast; note that we are excluding
        # MultiIndex because in this case each item in the index is a tuple of
        # length 2, and therefore is considered an array of length 2 in the
        # comparison instead of a scalar
        if not isinstance(index_a, MultiIndex):
            expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
            # assuming the 2nd to last item is unique in the data
            item = index_a[-2]
            tm.assert_numpy_array_equal(index_a == item, expected3)
            tm.assert_series_equal(series_a == item, Series(expected3))
예제 #26
0
def test_indexing():
    idx = date_range("2001-1-1", periods=20, freq="M")
    ts = Series(np.random.rand(len(idx)), index=idx)

    # getting

    # GH 3070, make sure semantics work on Series/Frame
    expected = ts["2001"]
    expected.name = "A"

    df = DataFrame({"A": ts})
    with tm.assert_produces_warning(FutureWarning):
        # GH#36179 string indexing on rows for DataFrame deprecated
        result = df["2001"]["A"]
    tm.assert_series_equal(expected, result)

    # setting
    ts["2001"] = 1
    expected = ts["2001"]
    expected.name = "A"

    df.loc["2001", "A"] = 1

    with tm.assert_produces_warning(FutureWarning):
        # GH#36179 string indexing on rows for DataFrame deprecated
        result = df["2001"]["A"]
    tm.assert_series_equal(expected, result)

    # GH3546 (not including times on the last day)
    idx = date_range(start="2013-05-31 00:00",
                     end="2013-05-31 23:00",
                     freq="H")
    ts = Series(range(len(idx)), index=idx)
    expected = ts["2013-05"]
    tm.assert_series_equal(expected, ts)

    idx = date_range(start="2013-05-31 00:00",
                     end="2013-05-31 23:59",
                     freq="S")
    ts = Series(range(len(idx)), index=idx)
    expected = ts["2013-05"]
    tm.assert_series_equal(expected, ts)

    idx = [
        Timestamp("2013-05-31 00:00"),
        Timestamp(datetime(2013, 5, 31, 23, 59, 59, 999999)),
    ]
    ts = Series(range(len(idx)), index=idx)
    expected = ts["2013"]
    tm.assert_series_equal(expected, ts)

    # GH14826, indexing with a seconds resolution string / datetime object
    df = DataFrame(
        np.random.rand(5, 5),
        columns=["open", "high", "low", "close", "volume"],
        index=date_range("2012-01-02 18:01:00",
                         periods=5,
                         tz="US/Central",
                         freq="s"),
    )
    expected = df.loc[[df.index[2]]]

    # this is a single date, so will raise
    with pytest.raises(KeyError, match=r"^'2012-01-02 18:01:02'$"):
        df["2012-01-02 18:01:02"]
    msg = r"Timestamp\('2012-01-02 18:01:02-0600', tz='US/Central', freq='S'\)"
    with pytest.raises(KeyError, match=msg):
        df[df.index[2]]
예제 #27
0
파일: test_loc.py 프로젝트: sduzjp/Python
def test_loc_getitem_iterator(string_series):
    idx = iter(string_series.index[:10])
    result = string_series.loc[idx]
    tm.assert_series_equal(result, string_series[:10])
예제 #28
0
    def test_dt_accessor_api_for_categorical(self):
        # https://github.com/pandas-dev/pandas/issues/10661

        s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
        c_dr = s_dr.astype("category")

        s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
        c_pr = s_pr.astype("category")

        s_tdr = Series(timedelta_range("1 days", "10 days"))
        c_tdr = s_tdr.astype("category")

        # only testing field (like .day)
        # and bool (is_month_start)
        get_ops = lambda x: x._datetimelike_ops

        test_data = [
            ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr),
            ("Period", get_ops(PeriodArray), s_pr, c_pr),
            ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr),
        ]

        assert isinstance(c_dr.dt, Properties)

        special_func_defs = [
            ("strftime", ("%Y-%m-%d",), {}),
            ("tz_convert", ("EST",), {}),
            ("round", ("D",), {}),
            ("floor", ("D",), {}),
            ("ceil", ("D",), {}),
            ("asfreq", ("D",), {}),
            # FIXME: don't leave commented-out
            # ('tz_localize', ("UTC",), {}),
        ]
        _special_func_names = [f[0] for f in special_func_defs]

        # the series is already localized
        _ignore_names = ["tz_localize", "components"]

        for name, attr_names, s, c in test_data:
            func_names = [
                f
                for f in dir(s.dt)
                if not (
                    f.startswith("_")
                    or f in attr_names
                    or f in _special_func_names
                    or f in _ignore_names
                )
            ]

            func_defs = [(f, (), {}) for f in func_names]
            for f_def in special_func_defs:
                if f_def[0] in dir(s.dt):
                    func_defs.append(f_def)

            for func, args, kwargs in func_defs:
                with warnings.catch_warnings():
                    if func == "to_period":
                        # dropping TZ
                        warnings.simplefilter("ignore", UserWarning)
                    res = getattr(c.dt, func)(*args, **kwargs)
                    exp = getattr(s.dt, func)(*args, **kwargs)

                tm.assert_equal(res, exp)

            for attr in attr_names:
                if attr in ["week", "weekofyear"]:
                    # GH#33595 Deprecate week and weekofyear
                    continue
                res = getattr(c.dt, attr)
                exp = getattr(s.dt, attr)

            if isinstance(res, DataFrame):
                tm.assert_frame_equal(res, exp)
            elif isinstance(res, Series):
                tm.assert_series_equal(res, exp)
            else:
                tm.assert_almost_equal(res, exp)

        invalid = Series([1, 2, 3]).astype("category")
        msg = "Can only use .dt accessor with datetimelike"

        with pytest.raises(AttributeError, match=msg):
            invalid.dt
        assert not hasattr(invalid, "str")
    def test_groupby_rolling_center_center(self):
        # GH 35552
        series = Series(range(1, 6))
        result = series.groupby(series).rolling(center=True, window=3).mean()
        expected = Series(
            [np.nan] * 5,
            index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3), (5, 4))),
        )
        tm.assert_series_equal(result, expected)

        series = Series(range(1, 5))
        result = series.groupby(series).rolling(center=True, window=3).mean()
        expected = Series(
            [np.nan] * 4,
            index=MultiIndex.from_tuples(((1, 0), (2, 1), (3, 2), (4, 3))),
        )
        tm.assert_series_equal(result, expected)

        df = DataFrame({"a": ["a"] * 5 + ["b"] * 6, "b": range(11)})
        result = df.groupby("a").rolling(center=True, window=3).mean()
        expected = DataFrame(
            [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, 9, np.nan],
            index=MultiIndex.from_tuples(
                (
                    ("a", 0),
                    ("a", 1),
                    ("a", 2),
                    ("a", 3),
                    ("a", 4),
                    ("b", 5),
                    ("b", 6),
                    ("b", 7),
                    ("b", 8),
                    ("b", 9),
                    ("b", 10),
                ),
                names=["a", None],
            ),
            columns=["b"],
        )
        tm.assert_frame_equal(result, expected)

        df = DataFrame({"a": ["a"] * 5 + ["b"] * 5, "b": range(10)})
        result = df.groupby("a").rolling(center=True, window=3).mean()
        expected = DataFrame(
            [np.nan, 1, 2, 3, np.nan, np.nan, 6, 7, 8, np.nan],
            index=MultiIndex.from_tuples(
                (
                    ("a", 0),
                    ("a", 1),
                    ("a", 2),
                    ("a", 3),
                    ("a", 4),
                    ("b", 5),
                    ("b", 6),
                    ("b", 7),
                    ("b", 8),
                    ("b", 9),
                ),
                names=["a", None],
            ),
            columns=["b"],
        )
        tm.assert_frame_equal(result, expected)
예제 #30
0
def test_apply_raw_float_frame_lambda(float_frame, axis):
    result = float_frame.apply(np.mean, axis=axis, raw=True)
    expected = float_frame.apply(lambda x: x.values.mean(), axis=axis)
    tm.assert_series_equal(result, expected)