Esempio n. 1
0
    def test_numpy_transpose(self):
        for obj in self.objs:
            tm.assert_equal(np.transpose(obj), obj)

            with pytest.raises(ValueError, match=self.errmsg):
                np.transpose(obj, axes=1)
Esempio n. 2
0
def assert_invalid_comparison(left, right, box):
    """
    Assert that comparison operations with mismatched types behave correctly.

    Parameters
    ----------
    left : np.ndarray, ExtensionArray, Index, or Series
    right : object
    box : {pd.DataFrame, pd.Series, pd.Index, pd.array, tm.to_array}
    """
    # Not for tznaive-tzaware comparison

    # Note: not quite the same as how we do this for tm.box_expected
    xbox = box if box not in [Index, array] else np.array

    def xbox2(x):
        # Eventually we'd like this to be tighter, but for now we'll
        #  just exclude PandasArray[bool]
        if isinstance(x, PandasArray):
            return x._ndarray
        return x

    result = xbox2(left == right)
    expected = xbox(np.zeros(result.shape, dtype=np.bool_))

    tm.assert_equal(result, expected)

    result = xbox2(right == left)
    tm.assert_equal(result, expected)

    result = xbox2(left != right)
    tm.assert_equal(result, ~expected)

    result = xbox2(right != left)
    tm.assert_equal(result, ~expected)

    msg = "|".join([
        "Invalid comparison between",
        "Cannot compare type",
        "not supported between",
        "invalid type promotion",
        (
            # GH#36706 npdev 1.20.0 2020-09-28
            r"The DTypes <class 'numpy.dtype\[datetime64\]'> and "
            r"<class 'numpy.dtype\[int64\]'> do not have a common DType. "
            "For example they cannot be stored in a single array unless the "
            "dtype is `object`."),
    ])
    with pytest.raises(TypeError, match=msg):
        left < right
    with pytest.raises(TypeError, match=msg):
        left <= right
    with pytest.raises(TypeError, match=msg):
        left > right
    with pytest.raises(TypeError, match=msg):
        left >= right
    with pytest.raises(TypeError, match=msg):
        right < left
    with pytest.raises(TypeError, match=msg):
        right <= left
    with pytest.raises(TypeError, match=msg):
        right > left
    with pytest.raises(TypeError, match=msg):
        right >= left
Esempio n. 3
0
def test_nat_arithmetic_td64_vector(op_name, box):
    # see gh-19124
    vec = box(["1 day", "2 day"], dtype="timedelta64[ns]")
    box_nat = box([NaT, NaT], dtype="timedelta64[ns]")
    tm.assert_equal(_ops[op_name](vec, NaT), box_nat)
Esempio n. 4
0
def test_apply_as_index_constant_lambda(as_index, expected):
    # GH 13217
    df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 1, 2, 2], "c": [1, 1, 1, 1]})
    result = df.groupby(["a", "b"], as_index=as_index).apply(lambda x: 1)
    tm.assert_equal(result, expected)
Esempio n. 5
0
 def test_constructor_unwraps_index(self, index):
     if isinstance(index, pd.MultiIndex):
         raise pytest.skip("MultiIndex has no ._data")
     a = index
     b = type(a)(a)
     tm.assert_equal(a._data, b._data)
Esempio n. 6
0
 def test_rank_object_first(self, frame_or_series, na_option, ascending, expected):
     obj = frame_or_series(["foo", "foo", None, "foo"])
     result = obj.rank(method="first", na_option=na_option, ascending=ascending)
     expected = frame_or_series(expected)
     tm.assert_equal(result, expected)
Esempio n. 7
0
def test_numpy_zero_dim_ndarray(other):
    arr = integer_array([1, None, 2])
    result = arr + np.array(other)
    expected = arr + other
    tm.assert_equal(result, expected)
def test_transpose(index_or_series_obj):
    obj = index_or_series_obj
    tm.assert_equal(obj.transpose(), obj)
Esempio n. 9
0
    def test_ufunc_coercions(self, holder):
        idx = holder([1, 2, 3, 4, 5], name="x")
        box = pd.Series if holder is pd.Series else pd.Index

        result = np.sqrt(idx)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index(np.sqrt(np.array([1, 2, 3, 4, 5])), name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = np.divide(idx, 2.0)
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        # _evaluate_numeric_binop
        result = idx + 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([3.0, 4.0, 5.0, 6.0, 7.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx - 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([-1.0, 0.0, 1.0, 2.0, 3.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx * 1.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([1.0, 2.0, 3.0, 4.0, 5.0], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)

        result = idx / 2.0
        assert result.dtype == "f8" and isinstance(result, box)
        exp = pd.Float64Index([0.5, 1.0, 1.5, 2.0, 2.5], name="x")
        exp = tm.box_expected(exp, box)
        tm.assert_equal(result, exp)
Esempio n. 10
0
def test_where_inplace_casting(data):
    # GH 22051
    df = DataFrame({"a": data})
    df_copy = df.where(pd.notnull(df), None).copy()
    df.where(pd.notnull(df), None, inplace=True)
    tm.assert_equal(df, df_copy)
    def test_logical_operators_nans(self, left, right, op, expected, frame_or_series):
        # GH#13896
        result = op(frame_or_series(left), frame_or_series(right))
        expected = frame_or_series(expected)

        tm.assert_equal(result, expected)
Esempio n. 12
0
def test_array_inference(data, expected):
    result = pd.array(data)
    tm.assert_equal(result, expected)
Esempio n. 13
0
def test_array(data, dtype, expected):
    result = pd.array(data, dtype=dtype)
    tm.assert_equal(result, expected)
Esempio n. 14
0
    def test_numpy_transpose(self, index_or_series_obj):
        obj = index_or_series_obj
        tm.assert_equal(np.transpose(obj), obj)

        with pytest.raises(ValueError, match=self.errmsg):
            np.transpose(obj, axes=1)
Esempio n. 15
0
 def test_sample_none_weights(self, obj):
     # Check None are also replaced by zeros.
     weights_with_None = [None] * 10
     weights_with_None[5] = 0.5
     tm.assert_equal(obj.sample(n=1, axis=0, weights=weights_with_None),
                     obj.iloc[5:6])
Esempio n. 16
0
    def test_mul_int_identity(self, op, numeric_idx, box_with_array):
        idx = numeric_idx
        idx = tm.box_expected(idx, box_with_array)

        result = op(idx, 1)
        tm.assert_equal(result, idx)
Esempio n. 17
0
    def test_sample(self, test, obj):
        # Fixes issue: 2419
        # Check behavior of random_state argument
        # Check for stability when receives seed or random state -- run 10
        # times.

        seed = np.random.randint(0, 100)
        tm.assert_equal(obj.sample(n=4, random_state=seed),
                        obj.sample(n=4, random_state=seed))

        tm.assert_equal(
            obj.sample(frac=0.7, random_state=seed),
            obj.sample(frac=0.7, random_state=seed),
        )

        tm.assert_equal(
            obj.sample(n=4, random_state=np.random.RandomState(test)),
            obj.sample(n=4, random_state=np.random.RandomState(test)),
        )

        tm.assert_equal(
            obj.sample(frac=0.7, random_state=np.random.RandomState(test)),
            obj.sample(frac=0.7, random_state=np.random.RandomState(test)),
        )

        tm.assert_equal(
            obj.sample(frac=2,
                       replace=True,
                       random_state=np.random.RandomState(test)),
            obj.sample(frac=2,
                       replace=True,
                       random_state=np.random.RandomState(test)),
        )

        os1, os2 = [], []
        for _ in range(2):
            np.random.seed(test)
            os1.append(obj.sample(n=4))
            os2.append(obj.sample(frac=0.7))
        tm.assert_equal(*os1)
        tm.assert_equal(*os2)
    def test_dt_accessor_api_for_categorical(self):
        # https://github.com/pandas-dev/pandas/issues/10661

        s_dr = Series(date_range("1/1/2015", periods=5, tz="MET"))
        c_dr = s_dr.astype("category")

        s_pr = Series(period_range("1/1/2015", freq="D", periods=5))
        c_pr = s_pr.astype("category")

        s_tdr = Series(timedelta_range("1 days", "10 days"))
        c_tdr = s_tdr.astype("category")

        # only testing field (like .day)
        # and bool (is_month_start)
        get_ops = lambda x: x._datetimelike_ops

        test_data = [
            ("Datetime", get_ops(DatetimeArray), s_dr, c_dr),
            ("Period", get_ops(PeriodArray), s_pr, c_pr),
            ("Timedelta", get_ops(TimedeltaArray), s_tdr, c_tdr),
        ]

        assert isinstance(c_dr.dt, Properties)

        special_func_defs = [
            ("strftime", ("%Y-%m-%d", ), {}),
            ("tz_convert", ("EST", ), {}),
            ("round", ("D", ), {}),
            ("floor", ("D", ), {}),
            ("ceil", ("D", ), {}),
            ("asfreq", ("D", ), {}),
            # FIXME: don't leave commented-out
            # ('tz_localize', ("UTC",), {}),
        ]
        _special_func_names = [f[0] for f in special_func_defs]

        # the series is already localized
        _ignore_names = ["tz_localize", "components"]

        for name, attr_names, s, c in test_data:
            func_names = [
                f for f in dir(s.dt)
                if not (f.startswith("_") or f in attr_names
                        or f in _special_func_names or f in _ignore_names)
            ]

            func_defs = [(f, (), {}) for f in func_names]
            for f_def in special_func_defs:
                if f_def[0] in dir(s.dt):
                    func_defs.append(f_def)

            for func, args, kwargs in func_defs:
                with warnings.catch_warnings():
                    if func == "to_period":
                        # dropping TZ
                        warnings.simplefilter("ignore", UserWarning)
                    res = getattr(c.dt, func)(*args, **kwargs)
                    exp = getattr(s.dt, func)(*args, **kwargs)

                tm.assert_equal(res, exp)

            for attr in attr_names:
                if attr in ["week", "weekofyear"]:
                    # GH#33595 Deprecate week and weekofyear
                    continue
                res = getattr(c.dt, attr)
                exp = getattr(s.dt, attr)

            if isinstance(res, DataFrame):
                tm.assert_frame_equal(res, exp)
            elif isinstance(res, Series):
                tm.assert_series_equal(res, exp)
            else:
                tm.assert_almost_equal(res, exp)

        invalid = Series([1, 2, 3]).astype("category")
        msg = "Can only use .dt accessor with datetimelike"

        with pytest.raises(AttributeError, match=msg):
            invalid.dt
        assert not hasattr(invalid, "str")
Esempio n. 19
0
def test_s3_protocols(s3_resource, tips_file, protocol, s3so):
    tm.assert_equal(
        read_csv("%s://pandas-test/tips.csv" % protocol, storage_options=s3so),
        read_csv(tips_file),
    )
Esempio n. 20
0
 def test_replace_extension_other(self, frame_or_series):
     # https://github.com/pandas-dev/pandas/issues/34530
     obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
     result = obj.replace("", "")  # no exception
     # should not have changed dtype
     tm.assert_equal(obj, result)
Esempio n. 21
0
def test_reindex_empty_series_tz_dtype():
    # GH 20869
    result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
    expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
    tm.assert_equal(result, expected)
def test_expanding_count_with_min_periods_exceeding_series_length(
        frame_or_series):
    # GH 25857
    result = frame_or_series(range(5)).expanding(min_periods=6).count()
    expected = frame_or_series([np.nan, np.nan, np.nan, np.nan, np.nan])
    tm.assert_equal(result, expected)
Esempio n. 23
0
def test_rolling_count_with_min_periods(constructor):
    # GH 26996
    result = constructor(range(5)).rolling(3, min_periods=3).count()
    expected = constructor([np.nan, np.nan, 3.0, 3.0, 3.0])
    tm.assert_equal(result, expected)
def test_arith_zero_dim_ndarray(other):
    arr = pd.array([1, None, 2], dtype="Float64")
    result = arr + np.array(other)
    expected = arr + other
    tm.assert_equal(result, expected)
Esempio n. 25
0
    def test_timedelta_fillna(self, frame_or_series):
        # GH#3371
        ser = Series([
            Timestamp("20130101"),
            Timestamp("20130101"),
            Timestamp("20130102"),
            Timestamp("20130103 9:01:01"),
        ])
        td = ser.diff()
        obj = frame_or_series(td)

        # reg fillna
        result = obj.fillna(Timedelta(seconds=0))
        expected = Series([
            timedelta(0),
            timedelta(0),
            timedelta(1),
            timedelta(days=1, seconds=9 * 3600 + 60 + 1),
        ])
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        # interpreted as seconds, no longer supported
        msg = "value should be a 'Timedelta', 'NaT', or array of those. Got 'int'"
        with pytest.raises(TypeError, match=msg):
            obj.fillna(1)

        result = obj.fillna(Timedelta(seconds=1))
        expected = Series([
            timedelta(seconds=1),
            timedelta(0),
            timedelta(1),
            timedelta(days=1, seconds=9 * 3600 + 60 + 1),
        ])
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(timedelta(days=1, seconds=1))
        expected = Series([
            timedelta(days=1, seconds=1),
            timedelta(0),
            timedelta(1),
            timedelta(days=1, seconds=9 * 3600 + 60 + 1),
        ])
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(np.timedelta64(10**9))
        expected = Series([
            timedelta(seconds=1),
            timedelta(0),
            timedelta(1),
            timedelta(days=1, seconds=9 * 3600 + 60 + 1),
        ])
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(NaT)
        expected = Series(
            [
                NaT,
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ],
            dtype="m8[ns]",
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        # ffill
        td[2] = np.nan
        obj = frame_or_series(td)
        result = obj.ffill()
        expected = td.fillna(Timedelta(seconds=0))
        expected[0] = np.nan
        expected = frame_or_series(expected)

        tm.assert_equal(result, expected)

        # bfill
        td[2] = np.nan
        obj = frame_or_series(td)
        result = obj.bfill()
        expected = td.fillna(Timedelta(seconds=0))
        expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)
Esempio n. 26
0
    def test_timedelta_fillna(self, frame_or_series):
        # GH#3371
        ser = Series(
            [
                Timestamp("20130101"),
                Timestamp("20130101"),
                Timestamp("20130102"),
                Timestamp("20130103 9:01:01"),
            ]
        )
        td = ser.diff()
        obj = frame_or_series(td)

        # reg fillna
        result = obj.fillna(Timedelta(seconds=0))
        expected = Series(
            [
                timedelta(0),
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ]
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        # interpreted as seconds, deprecated
        with pytest.raises(TypeError, match="Passing integers to fillna"):
            obj.fillna(1)

        result = obj.fillna(Timedelta(seconds=1))
        expected = Series(
            [
                timedelta(seconds=1),
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ]
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(timedelta(days=1, seconds=1))
        expected = Series(
            [
                timedelta(days=1, seconds=1),
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ]
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(np.timedelta64(10 ** 9))
        expected = Series(
            [
                timedelta(seconds=1),
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ]
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        result = obj.fillna(NaT)
        expected = Series(
            [
                NaT,
                timedelta(0),
                timedelta(1),
                timedelta(days=1, seconds=9 * 3600 + 60 + 1),
            ],
            dtype="m8[ns]",
        )
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)

        # ffill
        td[2] = np.nan
        obj = frame_or_series(td)
        result = obj.ffill()
        expected = td.fillna(Timedelta(seconds=0))
        expected[0] = np.nan
        expected = frame_or_series(expected)

        tm.assert_equal(result, expected)

        # bfill
        td[2] = np.nan
        obj = frame_or_series(td)
        result = obj.bfill()
        expected = td.fillna(Timedelta(seconds=0))
        expected[2] = timedelta(days=1, seconds=9 * 3600 + 60 + 1)
        expected = frame_or_series(expected)
        tm.assert_equal(result, expected)
Esempio n. 27
0
def test_rolling_count_with_min_periods(frame_or_series):
    # GH 26996
    result = frame_or_series(range(5)).rolling(3, min_periods=3).count()
    expected = frame_or_series([np.nan, np.nan, 3.0, 3.0, 3.0])
    tm.assert_equal(result, expected)
    def test_between_time(self, close_open_fixture, frame_or_series):
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        if frame_or_series is not DataFrame:
            ts = ts[0]

        stime = time(0, 0)
        etime = time(1, 0)
        inc_start, inc_end = close_open_fixture

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = 13 * 4 + 1
        if not inc_start:
            exp_len -= 5
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert t >= stime
            else:
                assert t > stime

            if inc_end:
                assert t <= etime
            else:
                assert t < etime

        result = ts.between_time("00:00", "01:00")
        expected = ts.between_time(stime, etime)
        tm.assert_equal(result, expected)

        # across midnight
        rng = date_range("1/1/2000", "1/5/2000", freq="5min")
        ts = DataFrame(np.random.randn(len(rng), 2), index=rng)
        if frame_or_series is not DataFrame:
            ts = ts[0]
        stime = time(22, 0)
        etime = time(9, 0)

        filtered = ts.between_time(stime, etime, inc_start, inc_end)
        exp_len = (12 * 11 + 1) * 4 + 1
        if not inc_start:
            exp_len -= 4
        if not inc_end:
            exp_len -= 4

        assert len(filtered) == exp_len
        for rs in filtered.index:
            t = rs.time()
            if inc_start:
                assert (t >= stime) or (t <= etime)
            else:
                assert (t > stime) or (t <= etime)

            if inc_end:
                assert (t <= etime) or (t >= stime)
            else:
                assert (t < etime) or (t >= stime)
Esempio n. 29
0
    def test_truncate(self, datetime_frame, frame_or_series):
        ts = datetime_frame[::3]
        ts = tm.get_obj(ts, frame_or_series)

        start, end = datetime_frame.index[3], datetime_frame.index[6]

        start_missing = datetime_frame.index[2]
        end_missing = datetime_frame.index[7]

        # neither specified
        truncated = ts.truncate()
        tm.assert_equal(truncated, ts)

        # both specified
        expected = ts[1:3]

        truncated = ts.truncate(start, end)
        tm.assert_equal(truncated, expected)

        truncated = ts.truncate(start_missing, end_missing)
        tm.assert_equal(truncated, expected)

        # start specified
        expected = ts[1:]

        truncated = ts.truncate(before=start)
        tm.assert_equal(truncated, expected)

        truncated = ts.truncate(before=start_missing)
        tm.assert_equal(truncated, expected)

        # end specified
        expected = ts[:3]

        truncated = ts.truncate(after=end)
        tm.assert_equal(truncated, expected)

        truncated = ts.truncate(after=end_missing)
        tm.assert_equal(truncated, expected)

        # corner case, empty series/frame returned
        truncated = ts.truncate(after=ts.index[0] - ts.index.freq)
        assert len(truncated) == 0

        truncated = ts.truncate(before=ts.index[-1] + ts.index.freq)
        assert len(truncated) == 0

        msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00"
        with pytest.raises(ValueError, match=msg):
            ts.truncate(before=ts.index[-1] - ts.index.freq,
                        after=ts.index[0] + ts.index.freq)
Esempio n. 30
0
 def test_transpose(self):
     for obj in self.objs:
         tm.assert_equal(obj.transpose(), obj)