def test_get_year_end(self): assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.SAT).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 8, 31)) assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.SUN).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 9, 1)) assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.FRI).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 8, 30)) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") assert (offset_n.get_year_end(datetime(2012, 1, 1)) == datetime(2013, 1, 1)) assert (offset_n.get_year_end(datetime(2012, 1, 10)) == datetime(2013, 1, 1)) assert (offset_n.get_year_end(datetime(2013, 1, 1)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 2)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 3)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 10)) == datetime(2013, 12, 31)) JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") assert (JNJ.get_year_end(datetime(2006, 1, 1)) == datetime(2006, 12, 31))
def test_fy5253_nearest_onoffset(): # GH#18877 dates on the year-end but not normalized to midnight offset = FY5253(n=3, startingMonth=7, variation="nearest", weekday=2) ts = Timestamp('2032-07-28 00:12:59.035729419+0000', tz='Africa/Dakar') fast = offset.onOffset(ts) slow = (ts + offset) - offset == ts assert fast == slow
def test_fy5253_last_onoffset(): # GH#18877 dates on the year-end but not normalized to midnight offset = FY5253(n=-5, startingMonth=5, variation="last", weekday=0) ts = Timestamp("1984-05-28 06:29:43.955911354+0200", tz="Europe/San_Marino") fast = offset.is_on_offset(ts) slow = (ts + offset) - offset == ts assert fast == slow
def test_apply(self): date_seq_nem_8_sat = [datetime(2006, 9, 2), datetime(2007, 9, 1), datetime(2008, 8, 30), datetime(2009, 8, 29), datetime(2010, 8, 28), datetime(2011, 9, 3)] JNJ = [datetime(2005, 1, 2), datetime(2006, 1, 1), datetime(2006, 12, 31), datetime(2007, 12, 30), datetime(2008, 12, 28), datetime(2010, 1, 3), datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2012, 12, 30)] DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") tests = [ (makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), date_seq_nem_8_sat), (makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), date_seq_nem_8_sat), (makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), [datetime(2006, 9, 1)] + date_seq_nem_8_sat), (makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:]), (makeFY5253NearestEndMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), list(reversed(date_seq_nem_8_sat))), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), JNJ), (makeFY5253NearestEndMonth(n=-1, startingMonth=12, weekday=WeekDay.SUN), list(reversed(JNJ))), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), [datetime(2005, 1, 2), datetime(2006, 1, 1)]), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), [datetime(2006, 1, 2), datetime(2006, 12, 31)]), (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)]) ] for test in tests: offset, data = test current = data[0] for datum in data[1:]: current = current + offset assert current == datum
def test_bunched_yearends(): # GH#14774 cases with two fiscal year-ends in the same calendar-year fy = FY5253(n=1, weekday=5, startingMonth=12, variation='nearest') dt = Timestamp('2004-01-01') assert fy.rollback(dt) == Timestamp('2002-12-28') assert (-fy).apply(dt) == Timestamp('2002-12-28') assert dt - fy == Timestamp('2002-12-28') assert fy.rollforward(dt) == Timestamp('2004-01-03') assert fy.apply(dt) == Timestamp('2004-01-03') assert fy + dt == Timestamp('2004-01-03') assert dt + fy == Timestamp('2004-01-03') # Same thing, but starting from a Timestamp in the previous year. dt = Timestamp('2003-12-31') assert fy.rollback(dt) == Timestamp('2002-12-28') assert (-fy).apply(dt) == Timestamp('2002-12-28') assert dt - fy == Timestamp('2002-12-28')
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series( date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict(float=DataFrame({ 'A': series['float'], 'B': series['float'] + 1 }), int=DataFrame({ 'A': series['int'], 'B': series['int'] + 1 }), mixed=DataFrame({k: data[k] for k in ['A', 'B', 'C', 'D']}), mi=DataFrame( { 'A': np.arange(5).astype(np.float64), 'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame({'A': Categorical(['foo', 'bar'])}), cat_and_float=DataFrame({ 'A': Categorical(['foo', 'bar', 'baz']), 'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { 'A': Timestamp('20130102', tz='US/Eastern'), 'B': Timestamp('20130603', tz='CET'), 'C': Timestamp('20130603', tz='UTC') }, index=range(5))) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
class TestFY5253NearestEndMonthQuarter(Base): offset_nem_sat_aug_4 = makeFY5253NearestEndMonthQuarter( 1, startingMonth=8, weekday=WeekDay.SAT, qtr_with_extra_week=4) offset_nem_thu_aug_4 = makeFY5253NearestEndMonthQuarter( 1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") on_offset_cases = [ # From Wikipedia (offset_nem_sat_aug_4, datetime(2006, 9, 2), True), (offset_nem_sat_aug_4, datetime(2007, 9, 1), True), (offset_nem_sat_aug_4, datetime(2008, 8, 30), True), (offset_nem_sat_aug_4, datetime(2009, 8, 29), True), (offset_nem_sat_aug_4, datetime(2010, 8, 28), True), (offset_nem_sat_aug_4, datetime(2011, 9, 3), True), (offset_nem_sat_aug_4, datetime(2016, 9, 3), True), (offset_nem_sat_aug_4, datetime(2017, 9, 2), True), (offset_nem_sat_aug_4, datetime(2018, 9, 1), True), (offset_nem_sat_aug_4, datetime(2019, 8, 31), True), (offset_nem_sat_aug_4, datetime(2006, 8, 27), False), (offset_nem_sat_aug_4, datetime(2007, 8, 28), False), (offset_nem_sat_aug_4, datetime(2008, 8, 31), False), (offset_nem_sat_aug_4, datetime(2009, 8, 30), False), (offset_nem_sat_aug_4, datetime(2010, 8, 29), False), (offset_nem_sat_aug_4, datetime(2011, 8, 28), False), (offset_nem_sat_aug_4, datetime(2006, 8, 25), False), (offset_nem_sat_aug_4, datetime(2007, 8, 24), False), (offset_nem_sat_aug_4, datetime(2008, 8, 29), False), (offset_nem_sat_aug_4, datetime(2009, 8, 28), False), (offset_nem_sat_aug_4, datetime(2010, 8, 27), False), (offset_nem_sat_aug_4, datetime(2011, 8, 26), False), (offset_nem_sat_aug_4, datetime(2019, 8, 30), False), # From Micron, see: # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 (offset_nem_thu_aug_4, datetime(2012, 8, 30), True), (offset_nem_thu_aug_4, datetime(2011, 9, 1), True), # See: http://google.brand.edgar-online.com/?sym=MU&formtypeID=13 (offset_nem_thu_aug_4, datetime(2013, 5, 30), True), (offset_nem_thu_aug_4, datetime(2013, 2, 28), True), (offset_nem_thu_aug_4, datetime(2012, 11, 29), True), (offset_nem_thu_aug_4, datetime(2012, 5, 31), True), (offset_nem_thu_aug_4, datetime(2007, 3, 1), True), (offset_nem_thu_aug_4, datetime(1994, 3, 3), True), (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False) ] @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected) def test_offset(self): offset = makeFY5253NearestEndMonthQuarter(1, startingMonth=8, weekday=WeekDay.THU, qtr_with_extra_week=4) MU = [ datetime(2012, 5, 31), datetime(2012, 8, 30), datetime(2012, 11, 29), datetime(2013, 2, 28), datetime(2013, 5, 30) ] date = MU[0] + relativedelta(days=-1) for expected in MU: assert_offset_equal(offset, date, expected) date = date + offset assert_offset_equal(offset, datetime(2012, 5, 31), datetime(2012, 8, 30)) assert_offset_equal(offset, datetime(2012, 5, 30), datetime(2012, 5, 31)) offset2 = FY5253Quarter(weekday=5, startingMonth=12, variation="last", qtr_with_extra_week=4) assert_offset_equal(offset2, datetime(2013, 1, 15), datetime(2013, 3, 30))
def makeFY5253LastOfMonth(*args, **kwds): return FY5253(*args, variation="last", **kwds)
def makeFY5253NearestEndMonth(*args, **kwds): return FY5253(*args, variation="nearest", **kwds)
class TestFY5253NearestEndMonth(Base): def test_get_year_end(self): assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.SAT).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 8, 31)) assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.SUN).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 9, 1)) assert (makeFY5253NearestEndMonth( startingMonth=8, weekday=WeekDay.FRI).get_year_end( datetime(2013, 1, 1)) == datetime(2013, 8, 30)) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") assert (offset_n.get_year_end(datetime(2012, 1, 1)) == datetime(2013, 1, 1)) assert (offset_n.get_year_end(datetime(2012, 1, 10)) == datetime(2013, 1, 1)) assert (offset_n.get_year_end(datetime(2013, 1, 1)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 2)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 3)) == datetime(2013, 12, 31)) assert (offset_n.get_year_end(datetime(2013, 1, 10)) == datetime(2013, 12, 31)) JNJ = FY5253(n=1, startingMonth=12, weekday=6, variation="nearest") assert (JNJ.get_year_end(datetime(2006, 1, 1)) == datetime(2006, 12, 31)) offset_lom_aug_sat = makeFY5253NearestEndMonth(1, startingMonth=8, weekday=WeekDay.SAT) offset_lom_aug_thu = makeFY5253NearestEndMonth(1, startingMonth=8, weekday=WeekDay.THU) offset_n = FY5253(weekday=WeekDay.TUE, startingMonth=12, variation="nearest") on_offset_cases = [ # From Wikipedia (see: # http://en.wikipedia.org/wiki/4%E2%80%934%E2%80%935_calendar # #Saturday_nearest_the_end_of_month) # 2006-09-02 2006 September 2 # 2007-09-01 2007 September 1 # 2008-08-30 2008 August 30 (leap year) # 2009-08-29 2009 August 29 # 2010-08-28 2010 August 28 # 2011-09-03 2011 September 3 # 2012-09-01 2012 September 1 (leap year) # 2013-08-31 2013 August 31 # 2014-08-30 2014 August 30 # 2015-08-29 2015 August 29 # 2016-09-03 2016 September 3 (leap year) # 2017-09-02 2017 September 2 # 2018-09-01 2018 September 1 # 2019-08-31 2019 August 31 (offset_lom_aug_sat, datetime(2006, 9, 2), True), (offset_lom_aug_sat, datetime(2007, 9, 1), True), (offset_lom_aug_sat, datetime(2008, 8, 30), True), (offset_lom_aug_sat, datetime(2009, 8, 29), True), (offset_lom_aug_sat, datetime(2010, 8, 28), True), (offset_lom_aug_sat, datetime(2011, 9, 3), True), (offset_lom_aug_sat, datetime(2016, 9, 3), True), (offset_lom_aug_sat, datetime(2017, 9, 2), True), (offset_lom_aug_sat, datetime(2018, 9, 1), True), (offset_lom_aug_sat, datetime(2019, 8, 31), True), (offset_lom_aug_sat, datetime(2006, 8, 27), False), (offset_lom_aug_sat, datetime(2007, 8, 28), False), (offset_lom_aug_sat, datetime(2008, 8, 31), False), (offset_lom_aug_sat, datetime(2009, 8, 30), False), (offset_lom_aug_sat, datetime(2010, 8, 29), False), (offset_lom_aug_sat, datetime(2011, 8, 28), False), (offset_lom_aug_sat, datetime(2006, 8, 25), False), (offset_lom_aug_sat, datetime(2007, 8, 24), False), (offset_lom_aug_sat, datetime(2008, 8, 29), False), (offset_lom_aug_sat, datetime(2009, 8, 28), False), (offset_lom_aug_sat, datetime(2010, 8, 27), False), (offset_lom_aug_sat, datetime(2011, 8, 26), False), (offset_lom_aug_sat, datetime(2019, 8, 30), False), # From Micron, see: # http://google.brand.edgar-online.com/?sym=MU&formtypeID=7 (offset_lom_aug_thu, datetime(2012, 8, 30), True), (offset_lom_aug_thu, datetime(2011, 9, 1), True), (offset_n, datetime(2012, 12, 31), False), (offset_n, datetime(2013, 1, 1), True), (offset_n, datetime(2013, 1, 2), False) ] @pytest.mark.parametrize('case', on_offset_cases) def test_onOffset(self, case): offset, dt, expected = case assert_onOffset(offset, dt, expected) def test_apply(self): date_seq_nem_8_sat = [ datetime(2006, 9, 2), datetime(2007, 9, 1), datetime(2008, 8, 30), datetime(2009, 8, 29), datetime(2010, 8, 28), datetime(2011, 9, 3) ] JNJ = [ datetime(2005, 1, 2), datetime(2006, 1, 1), datetime(2006, 12, 31), datetime(2007, 12, 30), datetime(2008, 12, 28), datetime(2010, 1, 3), datetime(2011, 1, 2), datetime(2012, 1, 1), datetime(2012, 12, 30) ] DEC_SAT = FY5253(n=-1, startingMonth=12, weekday=5, variation="nearest") tests = [(makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), date_seq_nem_8_sat), (makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), date_seq_nem_8_sat), (makeFY5253NearestEndMonth(startingMonth=8, weekday=WeekDay.SAT), [datetime(2006, 9, 1)] + date_seq_nem_8_sat), (makeFY5253NearestEndMonth(n=1, startingMonth=8, weekday=WeekDay.SAT), [datetime(2006, 9, 3)] + date_seq_nem_8_sat[1:]), (makeFY5253NearestEndMonth(n=-1, startingMonth=8, weekday=WeekDay.SAT), list(reversed(date_seq_nem_8_sat))), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), JNJ), (makeFY5253NearestEndMonth(n=-1, startingMonth=12, weekday=WeekDay.SUN), list(reversed(JNJ))), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), [datetime(2005, 1, 2), datetime(2006, 1, 1)]), (makeFY5253NearestEndMonth(n=1, startingMonth=12, weekday=WeekDay.SUN), [datetime(2006, 1, 2), datetime(2006, 12, 31)]), (DEC_SAT, [datetime(2013, 1, 15), datetime(2012, 12, 29)])] for test in tests: offset, data = test current = data[0] for datum in data[1:]: current = current + offset assert current == datum
def create_data(): """ create the pickle/msgpack data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = dict(timestamp=Timestamp("20130101"), period=Period("2012", "M")) index = dict( int=Index(np.arange(10)), date=date_range("20130101", periods=10), period=period_range("2013-01-01", freq="M", periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range("00:00:00", freq="30T", periods=10), ) index["range"] = RangeIndex(10) if _loose_version >= LooseVersion("0.21"): from pandas import interval_range index["interval"] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], )) series = dict( float=Series(data["A"]), int=Series(data["B"]), mixed=Series(data["E"]), ts=Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), mi=Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), dup=Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), cat=Series(Categorical(["foo", "bar", "baz"])), dt=Series(date_range("20130101", periods=5)), dt_tz=Series(date_range("20130101", periods=5, tz="US/Eastern")), period=Series([Period("2000Q1")] * 5), ) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame({ "A": series["float"], "B": series["float"] + 1 }), int=DataFrame({ "A": series["int"], "B": series["int"] + 1 }), mixed=DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), mi=DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), cat_onecol=DataFrame({"A": Categorical(["foo", "bar"])}), cat_and_float=DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), dt_mixed2_tzs=DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), ) cat = dict( int8=Categorical(list("abcdefg")), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000)), ) timestamp = dict( normal=Timestamp("2011-01-01"), nat=NaT, tz=Timestamp("2011-01-01", tz="US/Eastern"), ) timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return dict( series=series, frame=frame, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off, )
def create_data(): """create the pickle data""" data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], "B": [0, 1, 0, 1, 0], "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], "D": date_range("1/1/2009", periods=5), "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], } scalars = { "timestamp": Timestamp("20130101"), "period": Period("2012", "M") } index = { "int": Index(np.arange(10)), "date": date_range("20130101", periods=10), "period": period_range("2013-01-01", freq="M", periods=10), "float": Index(np.arange(10, dtype=np.float64)), "uint": Index(np.arange(10, dtype=np.uint64)), "timedelta": timedelta_range("00:00:00", freq="30T", periods=10), } index["range"] = RangeIndex(10) index["interval"] = interval_range(0, periods=10) mi = { "reg2": MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"], ["one", "two", "one", "two", "one", "two", "one", "two"], ])), names=["first", "second"], ) } series = { "float": Series(data["A"]), "int": Series(data["B"]), "mixed": Series(data["E"]), "ts": Series(np.arange(10).astype(np.int64), index=date_range("20130101", periods=10)), "mi": Series( np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=["one", "two"]), ), "dup": Series(np.arange(5).astype(np.float64), index=["A", "B", "C", "D", "A"]), "cat": Series(Categorical(["foo", "bar", "baz"])), "dt": Series(date_range("20130101", periods=5)), "dt_tz": Series(date_range("20130101", periods=5, tz="US/Eastern")), "period": Series([Period("2000Q1")] * 5), } mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = { "float": DataFrame({ "A": series["float"], "B": series["float"] + 1 }), "int": DataFrame({ "A": series["int"], "B": series["int"] + 1 }), "mixed": DataFrame({k: data[k] for k in ["A", "B", "C", "D"]}), "mi": DataFrame( { "A": np.arange(5).astype(np.float64), "B": np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples( tuple( zip(*[ ["bar", "bar", "baz", "baz", "baz"], ["one", "two", "one", "two", "three"], ])), names=["first", "second"], ), ), "dup": DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=["A", "B", "A"]), "cat_onecol": DataFrame({"A": Categorical(["foo", "bar"])}), "cat_and_float": DataFrame({ "A": Categorical(["foo", "bar", "baz"]), "B": np.arange(3).astype(np.int64), }), "mixed_dup": mixed_dup_df, "dt_mixed_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), }, index=range(5), ), "dt_mixed2_tzs": DataFrame( { "A": Timestamp("20130102", tz="US/Eastern"), "B": Timestamp("20130603", tz="CET"), "C": Timestamp("20130603", tz="UTC"), }, index=range(5), ), } cat = { "int8": Categorical(list("abcdefg")), "int16": Categorical(np.arange(1000)), "int32": Categorical(np.arange(10000)), } timestamp = { "normal": Timestamp("2011-01-01"), "nat": NaT, "tz": Timestamp("2011-01-01", tz="US/Eastern"), } timestamp["freq"] = Timestamp("2011-01-01", freq="D") timestamp["both"] = Timestamp("2011-01-01", tz="Asia/Tokyo", freq="M") off = { "DateOffset": DateOffset(years=1), "DateOffset_h_ns": DateOffset(hour=6, nanoseconds=5824), "BusinessDay": BusinessDay(offset=timedelta(seconds=9)), "BusinessHour": BusinessHour(normalize=True, n=6, end="15:14"), "CustomBusinessDay": CustomBusinessDay(weekmask="Mon Fri"), "SemiMonthBegin": SemiMonthBegin(day_of_month=9), "SemiMonthEnd": SemiMonthEnd(day_of_month=24), "MonthBegin": MonthBegin(1), "MonthEnd": MonthEnd(1), "QuarterBegin": QuarterBegin(1), "QuarterEnd": QuarterEnd(1), "Day": Day(1), "YearBegin": YearBegin(1), "YearEnd": YearEnd(1), "Week": Week(1), "Week_Tues": Week(2, normalize=False, weekday=1), "WeekOfMonth": WeekOfMonth(week=3, weekday=4), "LastWeekOfMonth": LastWeekOfMonth(n=1, weekday=3), "FY5253": FY5253(n=2, weekday=6, startingMonth=7, variation="last"), "Easter": Easter(), "Hour": Hour(1), "Minute": Minute(1), } return { "series": series, "frame": frame, "index": index, "scalars": scalars, "mi": mi, "sp_series": { "float": _create_sp_series(), "ts": _create_sp_tsseries() }, "sp_frame": { "float": _create_sp_frame() }, "cat": cat, "timestamp": timestamp, "offsets": off, }