def test_contains_freq_mismatch(self): rng = period_range("2007-01", freq="M", periods=10) assert Period("2007-01", freq="M") in rng assert not Period("2007-01", freq="D") in rng assert not Period("2007-01", freq="2M") in rng
def test_getitem_list_periods(self): # GH 7710 rng = period_range(start="2012-01-01", periods=10, freq="D") ts = Series(range(len(rng)), index=rng) exp = ts.iloc[[1]] tm.assert_series_equal(ts[[Period("2012-01-02", freq="D")]], exp)
def test_constructor_invalid(self): with tm.assert_raises_regex(TypeError, 'Cannot convert input'): Timestamp(slice(2)) with tm.assert_raises_regex(ValueError, 'Cannot convert Period'): Timestamp(Period('1000-01-01'))
def test_freq_str(self): i1 = Period("1982", freq="Min") assert i1.freq == offsets.Minute() assert i1.freqstr == "T"
def setup(self, freq): self.per = Period('2012-06-01', freq=freq)
def test_repr_nat(self): p = Period("nat", freq="M") assert repr(NaT) in repr(p)
def test_microsecond_repr(self): p = Period("2000-01-01 12:15:02.123567") assert repr(p) == "Period('2000-01-01 12:15:02.123567', 'U')"
def test_sub_offset(self): # freq is DateOffset msg = "Input has different freq|Input cannot be converted to Period" for freq in ["A", "2A", "3A"]: p = Period("2011", freq=freq) assert p - offsets.YearEnd(2) == Period("2009", freq=freq) for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, "D"), timedelta(365), ]: with pytest.raises(IncompatibleFrequency, match=msg): p - o for freq in ["M", "2M", "3M"]: p = Period("2011-03", freq=freq) assert p - offsets.MonthEnd(2) == Period("2011-01", freq=freq) assert p - offsets.MonthEnd(12) == Period("2010-03", freq=freq) for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, "D"), timedelta(365), ]: with pytest.raises(IncompatibleFrequency, match=msg): p - o # freq is Tick for freq in ["D", "2D", "3D"]: p = Period("2011-04-01", freq=freq) assert p - offsets.Day(5) == Period("2011-03-27", freq=freq) assert p - offsets.Hour(24) == Period("2011-03-31", freq=freq) assert p - np.timedelta64(2, "D") == Period("2011-03-30", freq=freq) assert p - np.timedelta64(3600 * 24, "s") == Period("2011-03-31", freq=freq) assert p - timedelta(-2) == Period("2011-04-03", freq=freq) assert p - timedelta(hours=48) == Period("2011-03-30", freq=freq) for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, "h"), timedelta(hours=23), ]: with pytest.raises(IncompatibleFrequency, match=msg): p - o for freq in ["H", "2H", "3H"]: p = Period("2011-04-01 09:00", freq=freq) assert p - offsets.Day(2) == Period("2011-03-30 09:00", freq=freq) assert p - offsets.Hour(3) == Period("2011-04-01 06:00", freq=freq) assert p - np.timedelta64(3, "h") == Period("2011-04-01 06:00", freq=freq) assert p - np.timedelta64(3600, "s") == Period( "2011-04-01 08:00", freq=freq ) assert p - timedelta(minutes=120) == Period("2011-04-01 07:00", freq=freq) assert p - timedelta(days=4, minutes=180) == Period( "2011-03-28 06:00", freq=freq ) for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, "s"), timedelta(hours=23, minutes=30), ]: with pytest.raises(IncompatibleFrequency, match=msg): p - o
def test_construction_month(self): expected = Period("2007-01", freq="M") i1 = Period("200701", freq="M") assert i1 == expected i1 = Period("200701", freq="M") assert i1 == expected i1 = Period(200701, freq="M") assert i1 == expected i1 = Period(ordinal=200701, freq="M") assert i1.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") i2 = Period("200701", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") i2 = Period(datetime(2007, 1, 1), freq="M") i3 = Period(np.datetime64("2007-01-01"), freq="M") i4 = Period(np_datetime64_compat("2007-01-01 00:00:00Z"), freq="M") i5 = Period(np_datetime64_compat("2007-01-01 00:00:00.000Z"), freq="M") assert i1 == i2 assert i1 == i3 assert i1 == i4 assert i1 == i5
def test_add_integer(self): per1 = Period(freq="D", year=2008, month=1, day=1) per2 = Period(freq="D", year=2008, month=1, day=2) assert per1 + 1 == per2 assert 1 + per1 == per2
def test_add_offset(self): # freq is DateOffset for freq in ["A", "2A", "3A"]: p = Period("2011", freq=freq) exp = Period("2013", freq=freq) assert p + offsets.YearEnd(2) == exp assert offsets.YearEnd(2) + p == exp for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, "D"), timedelta(365), ]: msg = "Input has different freq|Input cannot be converted to Period" with pytest.raises(IncompatibleFrequency, match=msg): p + o if isinstance(o, np.timedelta64): msg = "cannot use operands with types" with pytest.raises(TypeError, match=msg): o + p else: msg = "|".join( [ "Input has different freq", "Input cannot be converted to Period", ] ) with pytest.raises(IncompatibleFrequency, match=msg): o + p for freq in ["M", "2M", "3M"]: p = Period("2011-03", freq=freq) exp = Period("2011-05", freq=freq) assert p + offsets.MonthEnd(2) == exp assert offsets.MonthEnd(2) + p == exp exp = Period("2012-03", freq=freq) assert p + offsets.MonthEnd(12) == exp assert offsets.MonthEnd(12) + p == exp for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(365, "D"), timedelta(365), ]: msg = "Input has different freq|Input cannot be converted to Period" with pytest.raises(IncompatibleFrequency, match=msg): p + o if isinstance(o, np.timedelta64): msg = "cannot use operands with types" with pytest.raises(TypeError, match=msg): o + p else: msg = "|".join( [ "Input has different freq", "Input cannot be converted to Period", ] ) with pytest.raises(IncompatibleFrequency, match=msg): o + p # freq is Tick for freq in ["D", "2D", "3D"]: p = Period("2011-04-01", freq=freq) exp = Period("2011-04-06", freq=freq) assert p + offsets.Day(5) == exp assert offsets.Day(5) + p == exp exp = Period("2011-04-02", freq=freq) assert p + offsets.Hour(24) == exp assert offsets.Hour(24) + p == exp exp = Period("2011-04-03", freq=freq) assert p + np.timedelta64(2, "D") == exp msg = "cannot use operands with types" with pytest.raises(TypeError, match=msg): np.timedelta64(2, "D") + p exp = Period("2011-04-02", freq=freq) assert p + np.timedelta64(3600 * 24, "s") == exp with pytest.raises(TypeError, match=msg): np.timedelta64(3600 * 24, "s") + p exp = Period("2011-03-30", freq=freq) assert p + timedelta(-2) == exp assert timedelta(-2) + p == exp exp = Period("2011-04-03", freq=freq) assert p + timedelta(hours=48) == exp assert timedelta(hours=48) + p == exp for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(4, "h"), timedelta(hours=23), ]: msg = "Input has different freq|Input cannot be converted to Period" with pytest.raises(IncompatibleFrequency, match=msg): p + o if isinstance(o, np.timedelta64): msg = "cannot use operands with types" with pytest.raises(TypeError, match=msg): o + p else: msg = "|".join( [ "Input has different freq", "Input cannot be converted to Period", ] ) with pytest.raises(IncompatibleFrequency, match=msg): o + p for freq in ["H", "2H", "3H"]: p = Period("2011-04-01 09:00", freq=freq) exp = Period("2011-04-03 09:00", freq=freq) assert p + offsets.Day(2) == exp assert offsets.Day(2) + p == exp exp = Period("2011-04-01 12:00", freq=freq) assert p + offsets.Hour(3) == exp assert offsets.Hour(3) + p == exp msg = "cannot use operands with types" exp = Period("2011-04-01 12:00", freq=freq) assert p + np.timedelta64(3, "h") == exp with pytest.raises(TypeError, match=msg): np.timedelta64(3, "h") + p exp = Period("2011-04-01 10:00", freq=freq) assert p + np.timedelta64(3600, "s") == exp with pytest.raises(TypeError, match=msg): np.timedelta64(3600, "s") + p exp = Period("2011-04-01 11:00", freq=freq) assert p + timedelta(minutes=120) == exp assert timedelta(minutes=120) + p == exp exp = Period("2011-04-05 12:00", freq=freq) assert p + timedelta(days=4, minutes=180) == exp assert timedelta(days=4, minutes=180) + p == exp for o in [ offsets.YearBegin(2), offsets.MonthBegin(1), offsets.Minute(), np.timedelta64(3200, "s"), timedelta(hours=23, minutes=30), ]: msg = "Input has different freq|Input cannot be converted to Period" with pytest.raises(IncompatibleFrequency, match=msg): p + o if isinstance(o, np.timedelta64): msg = "cannot use operands with types" with pytest.raises(TypeError, match=msg): o + p else: msg = "|".join( [ "Input has different freq", "Input cannot be converted to Period", ] ) with pytest.raises(IncompatibleFrequency, match=msg): o + p
def test_construction_quarter(self): i1 = Period(year=2005, quarter=1, freq="Q") i2 = Period("1/1/2005", freq="Q") assert i1 == i2 i1 = Period(year=2005, quarter=3, freq="Q") i2 = Period("9/1/2005", freq="Q") assert i1 == i2 i1 = Period("2005Q1") i2 = Period(year=2005, quarter=1, freq="Q") i3 = Period("2005q1") assert i1 == i2 assert i1 == i3 i1 = Period("05Q1") assert i1 == i2 lower = Period("05q1") assert i1 == lower i1 = Period("1Q2005") assert i1 == i2 lower = Period("1q2005") assert i1 == lower i1 = Period("1Q05") assert i1 == i2 lower = Period("1q05") assert i1 == lower i1 = Period("4Q1984") assert i1.year == 1984 lower = Period("4q1984") assert i1 == lower
def test_isscalar_pandas_scalars(self): assert is_scalar(Timestamp('2014-01-01')) assert is_scalar(Timedelta(hours=1)) assert is_scalar(Period('2014-01-01'))
def test_constructor(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 pi = period_range(freq="D", start="1/1/2001", end="12/31/2009") assert len(pi) == 365 * 9 + 2 pi = period_range(freq="B", start="1/1/2001", end="12/31/2009") assert len(pi) == 261 * 9 pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00") assert len(pi) == 365 * 24 pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59") assert len(pi) == 24 * 60 pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59") assert len(pi) == 24 * 60 * 60 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) # tuple freq disallowed GH#34703 with pytest.raises(TypeError, match="pass as a string instead"): Period("2006-12-31", ("w", 1))
def test_to_timestamp_tz_arg_dateutil_from_string(self): p = Period("1/1/2005", freq="M").to_timestamp(tz="dateutil/Europe/Brussels") assert p.tz == dateutil_gettz("Europe/Brussels")
def test_period_addsub_nat(self, freq): assert NaT - Period("2011-01", freq=freq) is NaT assert Period("2011-01", freq=freq) - NaT is NaT
def test_repr(self): p = Period("Jan-2000") assert "2000-01" in repr(p) p = Period("2000-12-15") assert "2000-12-15" in repr(p)
def test_small_year_parsing(): per1 = Period("0001-01-07", "D") assert per1.year == 1 assert per1.day == 7
def test_millisecond_repr(self): p = Period("2000-01-01 12:15:02.123") assert repr(p) == "Period('2000-01-01 12:15:02.123', 'L')"
def test_period_constructor_offsets(self): assert Period("1/1/2005", freq=offsets.MonthEnd()) == Period( "1/1/2005", freq="M" ) assert Period("2005", freq=offsets.YearEnd()) == Period("2005", freq="A") assert Period("2005", freq=offsets.MonthEnd()) == Period("2005", freq="M") assert Period("3/10/12", freq=offsets.BusinessDay()) == Period( "3/10/12", freq="B" ) assert Period("3/10/12", freq=offsets.Day()) == Period("3/10/12", freq="D") assert Period( year=2005, quarter=1, freq=offsets.QuarterEnd(startingMonth=12) ) == Period(year=2005, quarter=1, freq="Q") assert Period( year=2005, quarter=2, freq=offsets.QuarterEnd(startingMonth=12) ) == Period(year=2005, quarter=2, freq="Q") assert Period(year=2005, month=3, day=1, freq=offsets.Day()) == Period( year=2005, month=3, day=1, freq="D" ) assert Period(year=2012, month=3, day=10, freq=offsets.BDay()) == Period( year=2012, month=3, day=10, freq="B" ) expected = Period("2005-03-01", freq="3D") assert Period(year=2005, month=3, day=1, freq=offsets.Day(3)) == expected assert Period(year=2005, month=3, day=1, freq="3D") == expected assert Period(year=2012, month=3, day=10, freq=offsets.BDay(3)) == Period( year=2012, month=3, day=10, freq="3B" ) assert Period(200701, freq=offsets.MonthEnd()) == Period(200701, freq="M") i1 = Period(ordinal=200701, freq=offsets.MonthEnd()) i2 = Period(ordinal=200701, freq="M") assert i1 == i2 assert i1.year == 18695 assert i2.year == 18695 i1 = Period(datetime(2007, 1, 1), freq="M") i2 = Period("200701", freq="M") assert i1 == i2 i1 = Period(date(2007, 1, 1), freq="M") i2 = Period(datetime(2007, 1, 1), freq="M") i3 = Period(np.datetime64("2007-01-01"), freq="M") i4 = Period(np_datetime64_compat("2007-01-01 00:00:00Z"), freq="M") i5 = Period(np_datetime64_compat("2007-01-01 00:00:00.000Z"), freq="M") assert i1 == i2 assert i1 == i3 assert i1 == i4 assert i1 == i5 i1 = Period("2007-01-01 09:00:00.001") expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") assert i1 == expected expected = Period(np_datetime64_compat("2007-01-01 09:00:00.001Z"), freq="L") assert i1 == expected i1 = Period("2007-01-01 09:00:00.00101") expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") assert i1 == expected expected = Period(np_datetime64_compat("2007-01-01 09:00:00.00101Z"), freq="U") assert i1 == expected
def test_strftime(self): # GH#3363 p = Period("2000-1-1 12:34:12", freq="S") res = p.strftime("%Y-%m-%d %H:%M:%S") assert res == "2000-01-01 12:34:12" assert isinstance(res, str)
def test_construction(self): i1 = Period("1/1/2005", freq="M") i2 = Period("Jan 2005") assert i1 == i2 i1 = Period("2005", freq="A") i2 = Period("2005") i3 = Period("2005", freq="a") assert i1 == i2 assert i1 == i3 i4 = Period("2005", freq="M") i5 = Period("2005", freq="m") msg = r"Input has different freq=M from Period\(freq=A-DEC\)" with pytest.raises(IncompatibleFrequency, match=msg): i1 != i4 assert i4 == i5 i1 = Period.now("Q") i2 = Period(datetime.now(), freq="Q") i3 = Period.now("q") assert i1 == i2 assert i1 == i3 i1 = Period("1982", freq="min") i2 = Period("1982", freq="MIN") assert i1 == i2 i2 = Period("1982", freq=("Min", 1)) assert i1 == i2 i1 = Period(year=2005, month=3, day=1, freq="D") i2 = Period("3/1/2005", freq="D") assert i1 == i2 i3 = Period(year=2005, month=3, day=1, freq="d") assert i1 == i3 i1 = Period("2007-01-01 09:00:00.001") expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1000), freq="L") assert i1 == expected expected = Period(np_datetime64_compat("2007-01-01 09:00:00.001Z"), freq="L") assert i1 == expected i1 = Period("2007-01-01 09:00:00.00101") expected = Period(datetime(2007, 1, 1, 9, 0, 0, 1010), freq="U") assert i1 == expected expected = Period(np_datetime64_compat("2007-01-01 09:00:00.00101Z"), freq="U") assert i1 == expected msg = "Must supply freq for ordinal value" with pytest.raises(ValueError, match=msg): Period(ordinal=200701) msg = "Invalid frequency: X" with pytest.raises(ValueError, match=msg): Period("2007-1-1", freq="X")
def test_properties_annually(self): # Test properties on Periods with annually frequency. a_date = Period(freq="A", year=2007) assert a_date.year == 2007
def test_period_from_ordinal(self): p = Period("2011-01", freq="M") res = Period._from_ordinal(p.ordinal, freq="M") assert p == res assert isinstance(res, Period)
def time_period_constructor(self, freq, is_offset): Period('2012-06-01', freq=freq)
def test_period_cons_combined(self): p = [ ( Period("2011-01", freq="1D1H"), Period("2011-01", freq="1H1D"), Period("2011-01", freq="H"), ), ( Period(ordinal=1, freq="1D1H"), Period(ordinal=1, freq="1H1D"), Period(ordinal=1, freq="H"), ), ] for p1, p2, p3 in p: assert p1.ordinal == p3.ordinal assert p2.ordinal == p3.ordinal assert p1.freq == offsets.Hour(25) assert p1.freqstr == "25H" assert p2.freq == offsets.Hour(25) assert p2.freqstr == "25H" assert p3.freq == offsets.Hour() assert p3.freqstr == "H" result = p1 + 1 assert result.ordinal == (p3 + 25).ordinal assert result.freq == p1.freq assert result.freqstr == "25H" result = p2 + 1 assert result.ordinal == (p3 + 25).ordinal assert result.freq == p2.freq assert result.freqstr == "25H" result = p1 - 1 assert result.ordinal == (p3 - 25).ordinal assert result.freq == p1.freq assert result.freqstr == "25H" result = p2 - 1 assert result.ordinal == (p3 - 25).ordinal assert result.freq == p2.freq assert result.freqstr == "25H" msg = "Frequency must be positive, because it represents span: -25H" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="-1D1H") with pytest.raises(ValueError, match=msg): Period("2011-01", freq="-1H1D") with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq="-1D1H") with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq="-1H1D") msg = "Frequency must be positive, because it represents span: 0D" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="0D0H") with pytest.raises(ValueError, match=msg): Period(ordinal=1, freq="0D0H") # You can only combine together day and intraday offsets msg = "Invalid frequency: 1W1D" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="1W1D") msg = "Invalid frequency: 1D1W" with pytest.raises(ValueError, match=msg): Period("2011-01", freq="1D1W")
class TestPartialSetting: def test_partial_setting(self): # GH2578, allow ix and friends to partially set # series s_orig = Series([1, 2, 3]) s = s_orig.copy() s[5] = 5 expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5 expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s[5] = 5.0 expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5.0 expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) # iloc/iat raise s = s_orig.copy() msg = "iloc cannot enlarge its target object" with pytest.raises(IndexError, match=msg): s.iloc[3] = 5.0 msg = "index 3 is out of bounds for axis 0 with size 3" with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 # ## frame ## df_orig = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64") # iloc/iat raise df = df_orig.copy() msg = "iloc cannot enlarge its target object" with pytest.raises(IndexError, match=msg): df.iloc[4, 2] = 5.0 msg = "index 2 is out of bounds for axis 0 with size 2" with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0 # row setting where it exists expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) df = df_orig.copy() df.iloc[1] = df.iloc[2] tm.assert_frame_equal(df, expected) expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) df = df_orig.copy() df.loc[1] = df.loc[2] tm.assert_frame_equal(df, expected) # like 2578, partial setting with dtype preservation expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})) df = df_orig.copy() df.loc[3] = df.loc[2] tm.assert_frame_equal(df, expected) # single dtype frame, overwrite expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) df = df_orig.copy() df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # mixed dtype frame, overwrite expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) df = df_orig.copy() df["B"] = df["B"].astype(np.float64) df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # single dtype frame, partial setting expected = df_orig.copy() expected["C"] = df["A"] df = df_orig.copy() df.loc[:, "C"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # mixed frame, partial setting expected = df_orig.copy() expected["C"] = df["A"] df = df_orig.copy() df.loc[:, "C"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) expected = pd.concat( [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True) df = df_orig.copy() df.loc[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.at[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq) expected = pd.concat([df_orig, exp_other], axis=1) df = df_orig.copy() df.loc[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes # by appending df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) s = df.loc[1].copy() s.name = 2 expected = df.append(s) df.loc[2] = df.loc[1] tm.assert_frame_equal(df, expected) # columns will align df = DataFrame(columns=["A", "B"]) df.loc[0] = Series(1, index=range(4)) tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0])) # columns will align df = DataFrame(columns=["A", "B"]) df.loc[0] = Series(1, index=["B"]) exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") tm.assert_frame_equal(df, exp) # list-like must conform df = DataFrame(columns=["A", "B"]) msg = "cannot set a row with mismatched columns" with pytest.raises(ValueError, match=msg): df.loc[0] = [1, 2, 3] # TODO: #15657, these are left as object and not coerced df = DataFrame(columns=["A", "B"]) df.loc[3] = [6, 7] exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype="object") tm.assert_frame_equal(df, exp) def test_series_partial_set(self): # partial set with new index # Regression from GH4825 ser = Series([0.1, 0.2], index=[1, 2]) # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3]] result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[3, 2, 3, "x"]] result = ser.reindex([3, 2, 3, "x"]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) with pytest.raises(KeyError, match="with any missing labels"): result = ser.loc[[2, 2, "x", 1]] result = ser.reindex([2, 2, "x", 1]) tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing in in the index msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " r"in the \[index\]\"") with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) with pytest.raises(KeyError, match="with any missing labels"): ser.loc[[2, 2, 3]] result = ser.reindex([2, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) with pytest.raises(KeyError, match="with any missing labels"): s.loc[[3, 4, 4]] result = s.reindex([3, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) with pytest.raises(KeyError, match="with any missing labels"): s.loc[[5, 3, 3]] result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) with pytest.raises(KeyError, match="with any missing labels"): s.loc[[5, 4, 4]] result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) with pytest.raises(KeyError, match="with any missing labels"): s.loc[[7, 2, 2]] result = s.reindex([7, 2, 2]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) with pytest.raises(KeyError, match="with any missing labels"): s.loc[[4, 5, 5]] result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) # iloc expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) def test_series_partial_set_with_name(self): # GH 11497 idx = Index([1, 2], dtype="int64", name="idx") ser = Series([0.1, 0.2], index=idx, name="s") # loc with pytest.raises(KeyError, match="with any missing labels"): ser.loc[[3, 2, 3]] with pytest.raises(KeyError, match="with any missing labels"): ser.loc[[3, 2, 3, "x"]] exp_idx = Index([2, 2, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) with pytest.raises(KeyError, match="with any missing labels"): ser.loc[[2, 2, "x", 1]] # raises as nothing in in the index msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " r"name='idx'\)\] are in the \[index\]\"") with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] with pytest.raises(KeyError, match="with any missing labels"): ser.loc[[2, 2, 3]] idx = Index([1, 2, 3], dtype="int64", name="idx") with pytest.raises(KeyError, match="with any missing labels"): Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="with any missing labels"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="with any missing labels"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] idx = Index([4, 5, 6, 7], dtype="int64", name="idx") with pytest.raises(KeyError, match="with any missing labels"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="with any missing labels"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] # iloc exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() df = orig.copy() # don't allow not string inserts msg = r"value should be a 'Timestamp' or 'NaT'\. Got '.*' instead\." with pytest.raises(TypeError, match=msg): df.loc[100.0, :] = df.iloc[0] with pytest.raises(TypeError, match=msg): df.loc[100, :] = df.iloc[0] # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] exp = orig.append(Series(df.iloc[0], name="a")) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" def test_partial_set_empty_frame(self): # partially set with an empty object # frame df = DataFrame() msg = "cannot set a frame with no defined columns" with pytest.raises(ValueError, match=msg): df.loc[1] = 1 with pytest.raises(ValueError, match=msg): df.loc[1] = Series([1], index=["foo"]) msg = "cannot set a frame with no defined index and a scalar" with pytest.raises(ValueError, match=msg): df.loc[:, 1] = 1 # these work as they don't really change # anything but the index # GH5632 expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) def f(): df = DataFrame(index=Index([], dtype="object")) df["foo"] = Series([], dtype="object") return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame() df["foo"] = Series(df.index) return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame() df["foo"] = df.index return df tm.assert_frame_equal(f(), expected) expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = [] return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = Series(np.arange(len(df)), dtype="float64") return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = range(len(df)) return df expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") tm.assert_frame_equal(f(), expected) df = DataFrame() tm.assert_index_equal(df.columns, Index([], dtype=object)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) tm.assert_frame_equal(df, df2) # no index to start expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) df = DataFrame(columns=["A", "B"]) df[0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected) df = DataFrame(columns=["A", "B"]) df.loc[:, 0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_row(self): # GH5720, GH5744 # don't create rows when empty expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["New"] = expected["New"].astype("float64") df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] y["New"] = np.nan tm.assert_frame_equal(y, expected) # tm.assert_frame_equal(y,expected) expected = DataFrame(columns=["a", "b", "c c", "d"]) expected["d"] = expected["d"].astype("int64") df = DataFrame(columns=["a", "b", "c c"]) df["d"] = 3 tm.assert_frame_equal(df, expected) tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=["A", "B", "C"]) expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["C"] = expected["C"].astype("float64") tm.assert_frame_equal(result, expected) def test_partial_set_empty_frame_set_series(self): # GH 5756 # setting with empty Series df = DataFrame(Series(dtype=object)) expected = DataFrame({0: Series(dtype=object)}) tm.assert_frame_equal(df, expected) df = DataFrame(Series(name="foo", dtype=object)) expected = DataFrame({"foo": Series(dtype=object)}) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_copy_assignment(self): # GH 5932 # copy on empty with assignment fails df = DataFrame(index=[0]) df = df.copy() df["a"] = 0 expected = DataFrame(0, index=[0], columns=["a"]) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_consistencies(self): # GH 6171 # consistency on empty frames df = DataFrame(columns=["x", "y"]) df["x"] = [1, 2] expected = DataFrame(dict(x=[1, 2], y=[np.nan, np.nan])) tm.assert_frame_equal(df, expected, check_dtype=False) df = DataFrame(columns=["x", "y"]) df["x"] = ["1", "2"] expected = DataFrame(dict(x=["1", "2"], y=[np.nan, np.nan]), dtype=object) tm.assert_frame_equal(df, expected) df = DataFrame(columns=["x", "y"]) df.loc[0, "x"] = 1 expected = DataFrame(dict(x=[1], y=[np.nan])) tm.assert_frame_equal(df, expected, check_dtype=False) @pytest.mark.parametrize( "idx,labels,expected_idx", [ ( period_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ Period("2000-01-04", freq="D"), Period("2000-01-08", freq="D"), Period("2000-01-12", freq="D"), ], ), ( date_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ Timestamp("2000-01-04", freq="D"), Timestamp("2000-01-08", freq="D"), Timestamp("2000-01-12", freq="D"), ], ), ( pd.timedelta_range(start="1 day", periods=20), ["4D", "8D", "12D"], [ pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day") ], ), ], ) def test_loc_with_list_of_strings_representing_datetimes( self, idx, labels, expected_idx, frame_or_series): # GH 11278 obj = frame_or_series(range(20), index=idx) expected_value = [3, 7, 11] expected = frame_or_series(expected_value, expected_idx) tm.assert_equal(expected, obj.loc[labels]) if frame_or_series is Series: tm.assert_series_equal(expected, obj[labels]) @pytest.mark.parametrize( "idx,labels", [ ( period_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-30"], ), ( date_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-30"], ), (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), ], ) def test_loc_with_list_of_strings_representing_datetimes_missing_value( self, idx, labels): # GH 11278 s = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) msg = r"with any missing labels" with pytest.raises(KeyError, match=msg): s.loc[labels] with pytest.raises(KeyError, match=msg): s[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] @pytest.mark.parametrize( "idx,labels,msg", [ ( period_range(start="2000", periods=20, freq="D"), ["4D", "8D"], (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " r"are in the \[index\]"), ), ( date_range(start="2000", periods=20, freq="D"), ["4D", "8D"], (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " r"are in the \[index\]"), ), ( pd.timedelta_range(start="1 day", periods=20), ["2000-01-04", "2000-01-08"], (r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " r"dtype='object'\)\] are in the \[index\]"), ), ], ) def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( self, idx, labels, msg): # GH 11278 s = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) with pytest.raises(KeyError, match=msg): s.loc[labels] with pytest.raises(KeyError, match=msg): s[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] def test_index_name_empty(self): # GH 31368 df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series expected = DataFrame({"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")) tm.assert_frame_equal(df, expected) # GH 36527 df = DataFrame() series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series expected = DataFrame({"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index")) tm.assert_frame_equal(df, expected) def test_slice_irregular_datetime_index_with_nan(self): # GH36953 index = pd.to_datetime( ["2012-01-01", "2012-01-02", "2012-01-03", None]) df = DataFrame(range(len(index)), index=index) expected = DataFrame(range(len(index[:3])), index=index[:3]) result = df["2012-01-01":"2012-01-04"] tm.assert_frame_equal(result, expected)
def test_round_trip(self): p = Period("2000Q1") new_p = tm.round_trip_pickle(p) assert new_p == p
return Series(np.random.randn(len(dates)), index=dates) # ---------------------------------------------------------------- # Scalars # ---------------------------------------------------------------- @pytest.fixture(params=[ ( Interval(left=0, right=5, inclusive="right"), IntervalDtype("int64", inclusive="right"), ), ( Interval(left=0.1, right=0.5, inclusive="right"), IntervalDtype("float64", inclusive="right"), ), (Period("2012-01", freq="M"), "period[M]"), (Period("2012-02-01", freq="D"), "period[D]"), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), (Timedelta(seconds=500), "timedelta64[ns]"), ]) def ea_scalar_and_dtype(request): return request.param # ---------------------------------------------------------------- # Operators & Operations # ---------------------------------------------------------------- _all_arithmetic_operators = [
def create_data(): """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )